Context Navigation

source: trunk/third/gcc/config/arm/lib1funcs.asm @ 8834

Visit:

Revision 8834, 30.0 KB checked in by ghudson, 28 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r8833, which included commits to RCS files with non-trunk default branches.

Line
1	@ libgcc1 routines for ARM cpu.
2	@ Division and remainder, from Appendix E of the Sparc Version 8
3	@ Architecture Manual, with fixes from Gordon Irlam.
4	@ Rewritten for the ARM by Richard Earnshaw (rwe@pegasus.esprit.ec.org)
5
6	/* Copyright (C) 1995 Free Software Foundation, Inc.
7
8	This file is free software; you can redistribute it and/or modify it
9	under the terms of the GNU General Public License as published by the
10	Free Software Foundation; either version 2, or (at your option) any
11	later version.
12
13	In addition to the permissions in the GNU General Public License, the
14	Free Software Foundation gives you unlimited permission to link the
15	compiled version of this file with other programs, and to distribute
16	those programs without any restriction coming from the use of this
17	file. (The General Public License restrictions do apply in other
18	respects; for example, they cover modification of the file, and
19	distribution when not linked into another program.)
20
21	This file is distributed in the hope that it will be useful, but
22	WITHOUT ANY WARRANTY; without even the implied warranty of
23	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24	General Public License for more details.
25
26	You should have received a copy of the GNU General Public License
27	along with this program; see the file COPYING. If not, write to
28	the Free Software Foundation, 59 Temple Place - Suite 330,
29	Boston, MA 02111-1307, USA. */
30
31	/* As a special exception, if you link this library with other files,
32	some of which are compiled with GCC, to produce an executable,
33	this library does not by itself cause the resulting executable
34	to be covered by the GNU General Public License.
35	This exception does not however invalidate any other reasons why
36	the executable file might be covered by the GNU General Public License. */
37
38	/*
39	* Input: dividend and divisor in r0 and r1 respectively.
40	*
41	* m4 parameters:
42	* NAME name of function to generate
43	* OP OP=div => r0 / r1; OP=mod => r0 % r1
44	* S S=true => signed; S=false => unsigned
45	*
46	* Algorithm parameters:
47	* N how many bits per iteration we try to get (4)
48	* WORDSIZE total number of bits (32)
49	*
50	* Derived constants:
51	* TOPBITS number of bits in the top `decade' of a number
52	*
53	* Important variables:
54	* Q the partial quotient under development (initially 0)
55	* R the remainder so far, initially the dividend
56	* ITER number of main division loop iterations required;
57	* equal to ceil(log2(quotient) / N). Note that this
58	* is the log base (2^N) of the quotient.
59	* V the current comparand, initially divisor2^(ITERN-1)
60	*
61	* Cost:
62	* Current estimate for non-large dividend is
63	* ceil(log2(quotient) / N) * (10 + 7N/2) + C
64	* A large dividend is one greater than 2^(31-TOPBITS) and takes a
65	* different path, as the upper bits of the quotient must be developed
66	* one bit at a time.
67	*/
68
69	/*
70	define(N, `4')dnl
71	define(WORDSIZE, `32')dnl
72	define(TOPBITS, eval(WORDSIZE - N*((WORDSIZE-1)/N)))dnl
73	dnl
74	define(dividend, `r0')dnl
75	define(divisor, `r1')dnl
76	define(Q, `r2')dnl
77	define(R, `r3')dnl
78	define(ITER, `ip')dnl
79	define(V, `lr')dnl
80	dnl
81	dnl m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d
82	define(T, `r4')dnl
83	define(SC, `r5')dnl
84	ifelse(S, `true', `define(SIGN, `r6')')dnl
85	define(REGLIST, `ifelse(S, `true', `{r4, r5, r6,', `{r4, r5,')')dnl
86	define(ret, `ldmia sp!, REGLIST pc}')dnl
87	dnl
88	dnl This is the recursive definition for developing quotient digits.
89	dnl
90	dnl Parameters:
91	dnl $1 the current depth, 1 <= $1 <= N
92	dnl $2 the current accumulation of quotient bits
93	dnl N max depth
94	dnl
95	dnl We add a new bit to $2 and either recurse or insert the bits in
96	dnl the quotient. R, Q, and V are inputs and outputs as defined above;
97	dnl the condition codes are expected to reflect the input R, and are
98	dnl modified to reflect the output R.
99	dnl
100	define(DEVELOP_QUOTIENT_BITS,
101	` @ depth $1, accumulated bits $2
102	mov V, V, lsr #1
103	blt L.$1.eval(2^N+$2+999)
104	@ remainder is positive
105	subs R, R, V
106	ifelse($1, N,
107	` ifelse(eval(2*$2+1<0), `0',
108	`add Q, Q, `#'eval($2*2+1)',
109	`sub Q, Q, `#'eval(-($2*2+1))')
110
111	b 9f
112	', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')')
113	L.$1.eval(2^N+$2+999):
114	@ remainder is negative
115	adds R, R, V
116	ifelse($1, N,
117	` ifelse(eval(2*$2-1<0), `0',
118	`add Q, Q, `#'eval($2*2-1)',
119	`sub Q, Q, `#'eval(-($2*2-1))')
120	b 9f
121
122	', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')')
123	ifelse($1, 1, `9:')')dnl
124
125	#include "trap.h"
126
127	ip .req r12
128	sp .req r13
129	lr .req r14
130	pc .req r15
131	.text
132	.globl NAME
133	.align 0
134	NAME:
135	stmdb sp!, REGLIST lr}
136	ifelse(S, `true',
137	` @ compute sign of result; if neither is negative, no problem
138	ifelse(OP, `div', `eor SIGN, divisor, dividend @ compute sign',
139	`mov SIGN, dividend')
140	cmp divisor, #0
141	rsbmi divisor, divisor, #0
142	beq Ldiv_zero
143	mov V, divisor
144	movs R, dividend
145	rsbmi R, R, #0 @ make dividend nonnegative
146	',
147	` @ Ready to divide. Compute size of quotient; scale comparand.
148	movs V, divisor
149	mov R, dividend
150	beq Ldiv_zero
151	')
152
153	cmp R, V @ if divisor exceeds dividend, done
154	mov Q, #0
155	bcc Lgot_result @ (and algorithm fails otherwise)
156	mov T, `#'(1 << (WORDSIZE - TOPBITS - 1))
157	cmp R, T
158	mov ITER, #0
159	bcc Lnot_really_big
160
161	@ `Here the dividend is >= 2^(31-N) or so. We must be careful here,
162	@ as our usual N-at-a-shot divide step will cause overflow and havoc.
163	@ The number of bits in the result here is N*ITER+SC, where SC <= N.
164	@ Compute ITER in an unorthodox manner: know we need to shift V into
165	@ the top decade: so do not even bother to compare to R.'
166	mov SC, #1
167	1:
168	cmp V, T
169	bcs 3f
170	mov V, V, lsl `#'N
171	add ITER, ITER, #1
172	b 1b
173
174	@ Now compute SC.
175	2: adds V, V, V
176	add SC, SC, #1
177	bcc Lnot_too_big
178
179	@ We get here if the divisor overflowed while shifting.
180	@ This means that R has the high-order bit set.
181	@ Restore V and subtract from R.
182	mov T, T, lsl `#'TOPBITS
183	mov V, V, lsr #1
184	add V, T, V
185	sub SC, SC, #1
186	b Ldo_single_div
187
188	Lnot_too_big:
189	3: cmp V, R
190	bcc 2b
191	@ beq Ldo_single_div
192
193	/-* NB: these are commented out in the V8-Sparc manual as well *-/
194	/-* (I do not understand this) *-/
195	@ V > R: went too far: back up 1 step
196	@ srl V, 1, V
197	@ dec SC
198	@ do single-bit divide steps
199	@
200	@ We have to be careful here. We know that R >= V, so we can do the
201	@ first divide step without thinking. BUT, the others are conditional,
202	@ and are only done if R >= 0. Because both R and V may have the high-
203	@ order bit set in the first step, just falling into the regular
204	@ division loop will mess up the first time around.
205	@ So we unroll slightly...
206	Ldo_single_div:
207	subs SC, SC, #1
208	blt Lend_regular_divide
209	sub R, R, V
210	mov Q, #1
211	b Lend_single_divloop
212	Lsingle_divloop:
213	cmp R, #0
214	mov Q, Q, lsl #1
215	mov V, V, lsr #1
216	@ R >= 0
217	subpl R, R, V
218	addpl Q, Q, #1
219	@ R < 0
220	addmi R, R, V
221	submi Q, Q, #1
222	Lend_single_divloop:
223	subs SC, SC, #1
224	bge Lsingle_divloop
225	b Lend_regular_divide
226
227	1:
228	add ITER, ITER, #1
229	Lnot_really_big:
230	mov V, V, lsl `#'N
231	cmp V, R
232	bls 1b
233	@
234	@ HOW CAN ITER EVER BE -1 HERE ?????
235	@
236	cmn ITER, #1
237	beq Lgot_result
238
239	Ldivloop:
240	cmp R, #0 @ set up for initial iteration
241	mov Q, Q, lsl `#'N
242	DEVELOP_QUOTIENT_BITS(1, 0)
243	Lend_regular_divide:
244	subs ITER, ITER, #1
245	bge Ldivloop
246	cmp R, #0
247	@ non-restoring fixup here (one instruction only!)
248	ifelse(OP, `div',
249	` sublt Q, Q, #1
250	', ` addlt R, divisor, R
251	')
252
253	Lgot_result:
254	ifelse(S, `true',
255	` @ check to see if answer should be < 0
256	cmp SIGN, #0
257	ifelse(OP, `div', `rsbmi Q, Q, #0', `rsbmi R, R, #0')
258	')
259	ifelse(OP, `div', `mov r0, Q', `mov r0, R')
260	ret
261
262	Ldiv_zero:
263	@ Divide by zero trap. If it returns, return 0 (about as
264	@ wrong as possible, but that is what SunOS does...).
265	bl ___div0
266	mov r0, #0
267	ret
268	*/
269
270	#ifdef L_udivsi3
271
272	ip .req r12
273	sp .req r13
274	lr .req r14
275	pc .req r15
276	.text
277	.globl ___udivsi3
278	.align 0
279	___udivsi3:
280	stmdb sp!, {r4, r5, lr}
281	@ Ready to divide. Compute size of quotient; scale comparand.
282	movs lr, r1
283	mov r3, r0
284	beq Ldiv_zero
285
286
287	cmp r3, lr @ if r1 exceeds r0, done
288	mov r2, #0
289	bcc Lgot_result @ (and algorithm fails otherwise)
290	mov r4, #(1 << (32 - 4 - 1))
291	cmp r3, r4
292	mov ip, #0
293	bcc Lnot_really_big
294
295	@ Here the dividend is >= 2^(31-N) or so. We must be careful here,
296	@ as our usual N-at-a-shot divide step will cause overflow and havoc.
297	@ The number of bits in the result here is N*ITER+SC, where SC <= N.
298	@ Compute ITER in an unorthodox manner: know we need to shift V into
299	@ the top decade: so do not even bother to compare to R.
300	mov r5, #1
301	1:
302	cmp lr, r4
303	bcs 3f
304	mov lr, lr, lsl #4
305	add ip, ip, #1
306	b 1b
307
308	@ Now compute r5.
309	2: adds lr, lr, lr
310	add r5, r5, #1
311	bcc Lnot_too_big
312
313	@ We get here if the r1 overflowed while shifting.
314	@ This means that r3 has the high-order bit set.
315	@ Restore lr and subtract from r3.
316	mov r4, r4, lsl #4
317	mov lr, lr, lsr #1
318	add lr, r4, lr
319	sub r5, r5, #1
320	b Ldo_single_div
321
322	Lnot_too_big:
323	3: cmp lr, r3
324	bcc 2b
325	@ beq Ldo_single_div
326
327	/* NB: these are commented out in the V8-Sparc manual as well */
328	/* (I do not understand this) */
329	@ lr > r3: went too far: back up 1 step
330	@ srl lr, 1, lr
331	@ dec r5
332	@ do single-bit divide steps
333	@
334	@ We have to be careful here. We know that r3 >= lr, so we can do the
335	@ first divide step without thinking. BUT, the others are conditional,
336	@ and are only done if r3 >= 0. Because both r3 and lr may have the high-
337	@ order bit set in the first step, just falling into the regular
338	@ division loop will mess up the first time around.
339	@ So we unroll slightly...
340	Ldo_single_div:
341	subs r5, r5, #1
342	blt Lend_regular_divide
343	sub r3, r3, lr
344	mov r2, #1
345	b Lend_single_divloop
346	Lsingle_divloop:
347	cmp r3, #0
348	mov r2, r2, lsl #1
349	mov lr, lr, lsr #1
350	@ r3 >= 0
351	subpl r3, r3, lr
352	addpl r2, r2, #1
353	@ r3 < 0
354	addmi r3, r3, lr
355	submi r2, r2, #1
356	Lend_single_divloop:
357	subs r5, r5, #1
358	bge Lsingle_divloop
359	b Lend_regular_divide
360
361	1:
362	add ip, ip, #1
363	Lnot_really_big:
364	mov lr, lr, lsl #4
365	cmp lr, r3
366	bls 1b
367	@
368	@ HOW CAN ip EVER BE -1 HERE ?????
369	@
370	cmn ip, #1
371	beq Lgot_result
372
373	Ldivloop:
374	cmp r3, #0 @ set up for initial iteration
375	mov r2, r2, lsl #4
376	@ depth 1, accumulated bits 0
377	mov lr, lr, lsr #1
378	blt L.1.1015
379	@ remainder is positive
380	subs r3, r3, lr
381	@ depth 2, accumulated bits 1
382	mov lr, lr, lsr #1
383	blt L.2.1016
384	@ remainder is positive
385	subs r3, r3, lr
386	@ depth 3, accumulated bits 3
387	mov lr, lr, lsr #1
388	blt L.3.1018
389	@ remainder is positive
390	subs r3, r3, lr
391	@ depth 4, accumulated bits 7
392	mov lr, lr, lsr #1
393	blt L.4.1022
394	@ remainder is positive
395	subs r3, r3, lr
396	add r2, r2, #15
397
398	b 9f
399
400	L.4.1022:
401	@ remainder is negative
402	adds r3, r3, lr
403	add r2, r2, #13
404	b 9f
405
406
407
408	L.3.1018:
409	@ remainder is negative
410	adds r3, r3, lr
411	@ depth 4, accumulated bits 5
412	mov lr, lr, lsr #1
413	blt L.4.1020
414	@ remainder is positive
415	subs r3, r3, lr
416	add r2, r2, #11
417
418	b 9f
419
420	L.4.1020:
421	@ remainder is negative
422	adds r3, r3, lr
423	add r2, r2, #9
424	b 9f
425
426
427
428
429	L.2.1016:
430	@ remainder is negative
431	adds r3, r3, lr
432	@ depth 3, accumulated bits 1
433	mov lr, lr, lsr #1
434	blt L.3.1016
435	@ remainder is positive
436	subs r3, r3, lr
437	@ depth 4, accumulated bits 3
438	mov lr, lr, lsr #1
439	blt L.4.1018
440	@ remainder is positive
441	subs r3, r3, lr
442	add r2, r2, #7
443
444	b 9f
445
446	L.4.1018:
447	@ remainder is negative
448	adds r3, r3, lr
449	add r2, r2, #5
450	b 9f
451
452
453
454	L.3.1016:
455	@ remainder is negative
456	adds r3, r3, lr
457	@ depth 4, accumulated bits 1
458	mov lr, lr, lsr #1
459	blt L.4.1016
460	@ remainder is positive
461	subs r3, r3, lr
462	add r2, r2, #3
463
464	b 9f
465
466	L.4.1016:
467	@ remainder is negative
468	adds r3, r3, lr
469	add r2, r2, #1
470	b 9f
471
472
473
474
475
476	L.1.1015:
477	@ remainder is negative
478	adds r3, r3, lr
479	@ depth 2, accumulated bits -1
480	mov lr, lr, lsr #1
481	blt L.2.1014
482	@ remainder is positive
483	subs r3, r3, lr
484	@ depth 3, accumulated bits -1
485	mov lr, lr, lsr #1
486	blt L.3.1014
487	@ remainder is positive
488	subs r3, r3, lr
489	@ depth 4, accumulated bits -1
490	mov lr, lr, lsr #1
491	blt L.4.1014
492	@ remainder is positive
493	subs r3, r3, lr
494	sub r2, r2, #1
495
496	b 9f
497
498	L.4.1014:
499	@ remainder is negative
500	adds r3, r3, lr
501	sub r2, r2, #3
502	b 9f
503
504
505
506	L.3.1014:
507	@ remainder is negative
508	adds r3, r3, lr
509	@ depth 4, accumulated bits -3
510	mov lr, lr, lsr #1
511	blt L.4.1012
512	@ remainder is positive
513	subs r3, r3, lr
514	sub r2, r2, #5
515
516	b 9f
517
518	L.4.1012:
519	@ remainder is negative
520	adds r3, r3, lr
521	sub r2, r2, #7
522	b 9f
523
524
525
526
527	L.2.1014:
528	@ remainder is negative
529	adds r3, r3, lr
530	@ depth 3, accumulated bits -3
531	mov lr, lr, lsr #1
532	blt L.3.1012
533	@ remainder is positive
534	subs r3, r3, lr
535	@ depth 4, accumulated bits -5
536	mov lr, lr, lsr #1
537	blt L.4.1010
538	@ remainder is positive
539	subs r3, r3, lr
540	sub r2, r2, #9
541
542	b 9f
543
544	L.4.1010:
545	@ remainder is negative
546	adds r3, r3, lr
547	sub r2, r2, #11
548	b 9f
549
550
551
552	L.3.1012:
553	@ remainder is negative
554	adds r3, r3, lr
555	@ depth 4, accumulated bits -7
556	mov lr, lr, lsr #1
557	blt L.4.1008
558	@ remainder is positive
559	subs r3, r3, lr
560	sub r2, r2, #13
561
562	b 9f
563
564	L.4.1008:
565	@ remainder is negative
566	adds r3, r3, lr
567	sub r2, r2, #15
568	b 9f
569
570
571
572
573
574	9:
575	Lend_regular_divide:
576	subs ip, ip, #1
577	bge Ldivloop
578	cmp r3, #0
579	@ non-restoring fixup here (one instruction only!)
580	sublt r2, r2, #1
581
582
583	Lgot_result:
584
585	mov r0, r2
586	ldmia sp!, {r4, r5, pc}
587
588	Ldiv_zero:
589	@ Divide by zero trap. If it returns, return 0 (about as
590	@ wrong as possible, but that is what SunOS does...).
591	bl ___div0
592	mov r0, #0
593	ldmia sp!, {r4, r5, pc}
594
595	#endif /* L_udivsi3 */
596
597	#ifdef L_divsi3
598
599	ip .req r12
600	sp .req r13
601	lr .req r14
602	pc .req r15
603	.text
604	.globl ___divsi3
605	.align 0
606	___divsi3:
607	stmdb sp!, {r4, r5, r6, lr}
608	@ compute sign of result; if neither is negative, no problem
609	eor r6, r1, r0 @ compute sign
610	cmp r1, #0
611	rsbmi r1, r1, #0
612	beq Ldiv_zero
613	mov lr, r1
614	movs r3, r0
615	rsbmi r3, r3, #0 @ make dividend nonnegative
616
617
618	cmp r3, lr @ if r1 exceeds r0, done
619	mov r2, #0
620	bcc Lgot_result @ (and algorithm fails otherwise)
621	mov r4, #(1 << (32 - 4 - 1))
622	cmp r3, r4
623	mov ip, #0
624	bcc Lnot_really_big
625
626	@ Here the dividend is >= 2^(31-N) or so. We must be careful here,
627	@ as our usual N-at-a-shot divide step will cause overflow and havoc.
628	@ The number of bits in the result here is N*ITER+SC, where SC <= N.
629	@ Compute ITER in an unorthodox manner: know we need to shift V into
630	@ the top decade: so do not even bother to compare to R.
631	mov r5, #1
632	1:
633	cmp lr, r4
634	bcs 3f
635	mov lr, lr, lsl #4
636	add ip, ip, #1
637	b 1b
638
639	@ Now compute r5.
640	2: adds lr, lr, lr
641	add r5, r5, #1
642	bcc Lnot_too_big
643
644	@ We get here if the r1 overflowed while shifting.
645	@ This means that r3 has the high-order bit set.
646	@ Restore lr and subtract from r3.
647	mov r4, r4, lsl #4
648	mov lr, lr, lsr #1
649	add lr, r4, lr
650	sub r5, r5, #1
651	b Ldo_single_div
652
653	Lnot_too_big:
654	3: cmp lr, r3
655	bcc 2b
656	@ beq Ldo_single_div
657
658	/* NB: these are commented out in the V8-Sparc manual as well */
659	/* (I do not understand this) */
660	@ lr > r3: went too far: back up 1 step
661	@ srl lr, 1, lr
662	@ dec r5
663	@ do single-bit divide steps
664	@
665	@ We have to be careful here. We know that r3 >= lr, so we can do the
666	@ first divide step without thinking. BUT, the others are conditional,
667	@ and are only done if r3 >= 0. Because both r3 and lr may have the high-
668	@ order bit set in the first step, just falling into the regular
669	@ division loop will mess up the first time around.
670	@ So we unroll slightly...
671	Ldo_single_div:
672	subs r5, r5, #1
673	blt Lend_regular_divide
674	sub r3, r3, lr
675	mov r2, #1
676	b Lend_single_divloop
677	Lsingle_divloop:
678	cmp r3, #0
679	mov r2, r2, lsl #1
680	mov lr, lr, lsr #1
681	@ r3 >= 0
682	subpl r3, r3, lr
683	addpl r2, r2, #1
684	@ r3 < 0
685	addmi r3, r3, lr
686	submi r2, r2, #1
687	Lend_single_divloop:
688	subs r5, r5, #1
689	bge Lsingle_divloop
690	b Lend_regular_divide
691
692	1:
693	add ip, ip, #1
694	Lnot_really_big:
695	mov lr, lr, lsl #4
696	cmp lr, r3
697	bls 1b
698	@
699	@ HOW CAN ip EVER BE -1 HERE ?????
700	@
701	cmn ip, #1
702	beq Lgot_result
703
704	Ldivloop:
705	cmp r3, #0 @ set up for initial iteration
706	mov r2, r2, lsl #4
707	@ depth 1, accumulated bits 0
708	mov lr, lr, lsr #1
709	blt L.1.1015
710	@ remainder is positive
711	subs r3, r3, lr
712	@ depth 2, accumulated bits 1
713	mov lr, lr, lsr #1
714	blt L.2.1016
715	@ remainder is positive
716	subs r3, r3, lr
717	@ depth 3, accumulated bits 3
718	mov lr, lr, lsr #1
719	blt L.3.1018
720	@ remainder is positive
721	subs r3, r3, lr
722	@ depth 4, accumulated bits 7
723	mov lr, lr, lsr #1
724	blt L.4.1022
725	@ remainder is positive
726	subs r3, r3, lr
727	add r2, r2, #15
728
729	b 9f
730
731	L.4.1022:
732	@ remainder is negative
733	adds r3, r3, lr
734	add r2, r2, #13
735	b 9f
736
737
738
739	L.3.1018:
740	@ remainder is negative
741	adds r3, r3, lr
742	@ depth 4, accumulated bits 5
743	mov lr, lr, lsr #1
744	blt L.4.1020
745	@ remainder is positive
746	subs r3, r3, lr
747	add r2, r2, #11
748
749	b 9f
750
751	L.4.1020:
752	@ remainder is negative
753	adds r3, r3, lr
754	add r2, r2, #9
755	b 9f
756
757
758
759
760	L.2.1016:
761	@ remainder is negative
762	adds r3, r3, lr
763	@ depth 3, accumulated bits 1
764	mov lr, lr, lsr #1
765	blt L.3.1016
766	@ remainder is positive
767	subs r3, r3, lr
768	@ depth 4, accumulated bits 3
769	mov lr, lr, lsr #1
770	blt L.4.1018
771	@ remainder is positive
772	subs r3, r3, lr
773	add r2, r2, #7
774
775	b 9f
776
777	L.4.1018:
778	@ remainder is negative
779	adds r3, r3, lr
780	add r2, r2, #5
781	b 9f
782
783
784
785	L.3.1016:
786	@ remainder is negative
787	adds r3, r3, lr
788	@ depth 4, accumulated bits 1
789	mov lr, lr, lsr #1
790	blt L.4.1016
791	@ remainder is positive
792	subs r3, r3, lr
793	add r2, r2, #3
794
795	b 9f
796
797	L.4.1016:
798	@ remainder is negative
799	adds r3, r3, lr
800	add r2, r2, #1
801	b 9f
802
803
804
805
806
807	L.1.1015:
808	@ remainder is negative
809	adds r3, r3, lr
810	@ depth 2, accumulated bits -1
811	mov lr, lr, lsr #1
812	blt L.2.1014
813	@ remainder is positive
814	subs r3, r3, lr
815	@ depth 3, accumulated bits -1
816	mov lr, lr, lsr #1
817	blt L.3.1014
818	@ remainder is positive
819	subs r3, r3, lr
820	@ depth 4, accumulated bits -1
821	mov lr, lr, lsr #1
822	blt L.4.1014
823	@ remainder is positive
824	subs r3, r3, lr
825	sub r2, r2, #1
826
827	b 9f
828
829	L.4.1014:
830	@ remainder is negative
831	adds r3, r3, lr
832	sub r2, r2, #3
833	b 9f
834
835
836
837	L.3.1014:
838	@ remainder is negative
839	adds r3, r3, lr
840	@ depth 4, accumulated bits -3
841	mov lr, lr, lsr #1
842	blt L.4.1012
843	@ remainder is positive
844	subs r3, r3, lr
845	sub r2, r2, #5
846
847	b 9f
848
849	L.4.1012:
850	@ remainder is negative
851	adds r3, r3, lr
852	sub r2, r2, #7
853	b 9f
854
855
856
857
858	L.2.1014:
859	@ remainder is negative
860	adds r3, r3, lr
861	@ depth 3, accumulated bits -3
862	mov lr, lr, lsr #1
863	blt L.3.1012
864	@ remainder is positive
865	subs r3, r3, lr
866	@ depth 4, accumulated bits -5
867	mov lr, lr, lsr #1
868	blt L.4.1010
869	@ remainder is positive
870	subs r3, r3, lr
871	sub r2, r2, #9
872
873	b 9f
874
875	L.4.1010:
876	@ remainder is negative
877	adds r3, r3, lr
878	sub r2, r2, #11
879	b 9f
880
881
882
883	L.3.1012:
884	@ remainder is negative
885	adds r3, r3, lr
886	@ depth 4, accumulated bits -7
887	mov lr, lr, lsr #1
888	blt L.4.1008
889	@ remainder is positive
890	subs r3, r3, lr
891	sub r2, r2, #13
892
893	b 9f
894
895	L.4.1008:
896	@ remainder is negative
897	adds r3, r3, lr
898	sub r2, r2, #15
899	b 9f
900
901
902
903
904
905	9:
906	Lend_regular_divide:
907	subs ip, ip, #1
908	bge Ldivloop
909	cmp r3, #0
910	@ non-restoring fixup here (one instruction only!)
911	sublt r2, r2, #1
912
913
914	Lgot_result:
915	@ check to see if answer should be < 0
916	cmp r6, #0
917	rsbmi r2, r2, #0
918
919	mov r0, r2
920	ldmia sp!, {r4, r5, r6, pc}
921
922	Ldiv_zero:
923	@ Divide by zero trap. If it returns, return 0 (about as
924	@ wrong as possible, but that is what SunOS does...).
925	bl ___div0
926	mov r0, #0
927	ldmia sp!, {r4, r5, r6, pc}
928
929	#endif /* L_divsi3 */
930
931	#ifdef L_umodsi3
932
933	ip .req r12
934	sp .req r13
935	lr .req r14
936	pc .req r15
937	.text
938	.globl ___umodsi3
939	.align 0
940	___umodsi3:
941	stmdb sp!, {r4, r5, lr}
942	@ Ready to divide. Compute size of quotient; scale comparand.
943	movs lr, r1
944	mov r3, r0
945	beq Ldiv_zero
946
947
948	cmp r3, lr @ if r1 exceeds r0, done
949	mov r2, #0
950	bcc Lgot_result @ (and algorithm fails otherwise)
951	mov r4, #(1 << (32 - 4 - 1))
952	cmp r3, r4
953	mov ip, #0
954	bcc Lnot_really_big
955
956	@ Here the dividend is >= 2^(31-N) or so. We must be careful here,
957	@ as our usual N-at-a-shot divide step will cause overflow and havoc.
958	@ The number of bits in the result here is N*ITER+SC, where SC <= N.
959	@ Compute ITER in an unorthodox manner: know we need to shift V into
960	@ the top decade: so do not even bother to compare to R.
961	mov r5, #1
962	1:
963	cmp lr, r4
964	bcs 3f
965	mov lr, lr, lsl #4
966	add ip, ip, #1
967	b 1b
968
969	@ Now compute r5.
970	2: adds lr, lr, lr
971	add r5, r5, #1
972	bcc Lnot_too_big
973
974	@ We get here if the r1 overflowed while shifting.
975	@ This means that r3 has the high-order bit set.
976	@ Restore lr and subtract from r3.
977	mov r4, r4, lsl #4
978	mov lr, lr, lsr #1
979	add lr, r4, lr
980	sub r5, r5, #1
981	b Ldo_single_div
982
983	Lnot_too_big:
984	3: cmp lr, r3
985	bcc 2b
986	@ beq Ldo_single_div
987
988	/* NB: these are commented out in the V8-Sparc manual as well */
989	/* (I do not understand this) */
990	@ lr > r3: went too far: back up 1 step
991	@ srl lr, 1, lr
992	@ dec r5
993	@ do single-bit divide steps
994	@
995	@ We have to be careful here. We know that r3 >= lr, so we can do the
996	@ first divide step without thinking. BUT, the others are conditional,
997	@ and are only done if r3 >= 0. Because both r3 and lr may have the high-
998	@ order bit set in the first step, just falling into the regular
999	@ division loop will mess up the first time around.
1000	@ So we unroll slightly...
1001	Ldo_single_div:
1002	subs r5, r5, #1
1003	blt Lend_regular_divide
1004	sub r3, r3, lr
1005	mov r2, #1
1006	b Lend_single_divloop
1007	Lsingle_divloop:
1008	cmp r3, #0
1009	mov r2, r2, lsl #1
1010	mov lr, lr, lsr #1
1011	@ r3 >= 0
1012	subpl r3, r3, lr
1013	addpl r2, r2, #1
1014	@ r3 < 0
1015	addmi r3, r3, lr
1016	submi r2, r2, #1
1017	Lend_single_divloop:
1018	subs r5, r5, #1
1019	bge Lsingle_divloop
1020	b Lend_regular_divide
1021
1022	1:
1023	add ip, ip, #1
1024	Lnot_really_big:
1025	mov lr, lr, lsl #4
1026	cmp lr, r3
1027	bls 1b
1028	@
1029	@ HOW CAN ip EVER BE -1 HERE ?????
1030	@
1031	cmn ip, #1
1032	beq Lgot_result
1033
1034	Ldivloop:
1035	cmp r3, #0 @ set up for initial iteration
1036	mov r2, r2, lsl #4
1037	@ depth 1, accumulated bits 0
1038	mov lr, lr, lsr #1
1039	blt L.1.1015
1040	@ remainder is positive
1041	subs r3, r3, lr
1042	@ depth 2, accumulated bits 1
1043	mov lr, lr, lsr #1
1044	blt L.2.1016
1045	@ remainder is positive
1046	subs r3, r3, lr
1047	@ depth 3, accumulated bits 3
1048	mov lr, lr, lsr #1
1049	blt L.3.1018
1050	@ remainder is positive
1051	subs r3, r3, lr
1052	@ depth 4, accumulated bits 7
1053	mov lr, lr, lsr #1
1054	blt L.4.1022
1055	@ remainder is positive
1056	subs r3, r3, lr
1057	add r2, r2, #15
1058
1059	b 9f
1060
1061	L.4.1022:
1062	@ remainder is negative
1063	adds r3, r3, lr
1064	add r2, r2, #13
1065	b 9f
1066
1067
1068
1069	L.3.1018:
1070	@ remainder is negative
1071	adds r3, r3, lr
1072	@ depth 4, accumulated bits 5
1073	mov lr, lr, lsr #1
1074	blt L.4.1020
1075	@ remainder is positive
1076	subs r3, r3, lr
1077	add r2, r2, #11
1078
1079	b 9f
1080
1081	L.4.1020:
1082	@ remainder is negative
1083	adds r3, r3, lr
1084	add r2, r2, #9
1085	b 9f
1086
1087
1088
1089
1090	L.2.1016:
1091	@ remainder is negative
1092	adds r3, r3, lr
1093	@ depth 3, accumulated bits 1
1094	mov lr, lr, lsr #1
1095	blt L.3.1016
1096	@ remainder is positive
1097	subs r3, r3, lr
1098	@ depth 4, accumulated bits 3
1099	mov lr, lr, lsr #1
1100	blt L.4.1018
1101	@ remainder is positive
1102	subs r3, r3, lr
1103	add r2, r2, #7
1104
1105	b 9f
1106
1107	L.4.1018:
1108	@ remainder is negative
1109	adds r3, r3, lr
1110	add r2, r2, #5
1111	b 9f
1112
1113
1114
1115	L.3.1016:
1116	@ remainder is negative
1117	adds r3, r3, lr
1118	@ depth 4, accumulated bits 1
1119	mov lr, lr, lsr #1
1120	blt L.4.1016
1121	@ remainder is positive
1122	subs r3, r3, lr
1123	add r2, r2, #3
1124
1125	b 9f
1126
1127	L.4.1016:
1128	@ remainder is negative
1129	adds r3, r3, lr
1130	add r2, r2, #1
1131	b 9f
1132
1133
1134
1135
1136
1137	L.1.1015:
1138	@ remainder is negative
1139	adds r3, r3, lr
1140	@ depth 2, accumulated bits -1
1141	mov lr, lr, lsr #1
1142	blt L.2.1014
1143	@ remainder is positive
1144	subs r3, r3, lr
1145	@ depth 3, accumulated bits -1
1146	mov lr, lr, lsr #1
1147	blt L.3.1014
1148	@ remainder is positive
1149	subs r3, r3, lr
1150	@ depth 4, accumulated bits -1
1151	mov lr, lr, lsr #1
1152	blt L.4.1014
1153	@ remainder is positive
1154	subs r3, r3, lr
1155	sub r2, r2, #1
1156
1157	b 9f
1158
1159	L.4.1014:
1160	@ remainder is negative
1161	adds r3, r3, lr
1162	sub r2, r2, #3
1163	b 9f
1164
1165
1166
1167	L.3.1014:
1168	@ remainder is negative
1169	adds r3, r3, lr
1170	@ depth 4, accumulated bits -3
1171	mov lr, lr, lsr #1
1172	blt L.4.1012
1173	@ remainder is positive
1174	subs r3, r3, lr
1175	sub r2, r2, #5
1176
1177	b 9f
1178
1179	L.4.1012:
1180	@ remainder is negative
1181	adds r3, r3, lr
1182	sub r2, r2, #7
1183	b 9f
1184
1185
1186
1187
1188	L.2.1014:
1189	@ remainder is negative
1190	adds r3, r3, lr
1191	@ depth 3, accumulated bits -3
1192	mov lr, lr, lsr #1
1193	blt L.3.1012
1194	@ remainder is positive
1195	subs r3, r3, lr
1196	@ depth 4, accumulated bits -5
1197	mov lr, lr, lsr #1
1198	blt L.4.1010
1199	@ remainder is positive
1200	subs r3, r3, lr
1201	sub r2, r2, #9
1202
1203	b 9f
1204
1205	L.4.1010:
1206	@ remainder is negative
1207	adds r3, r3, lr
1208	sub r2, r2, #11
1209	b 9f
1210
1211
1212
1213	L.3.1012:
1214	@ remainder is negative
1215	adds r3, r3, lr
1216	@ depth 4, accumulated bits -7
1217	mov lr, lr, lsr #1
1218	blt L.4.1008
1219	@ remainder is positive
1220	subs r3, r3, lr
1221	sub r2, r2, #13
1222
1223	b 9f
1224
1225	L.4.1008:
1226	@ remainder is negative
1227	adds r3, r3, lr
1228	sub r2, r2, #15
1229	b 9f
1230
1231
1232
1233
1234
1235	9:
1236	Lend_regular_divide:
1237	subs ip, ip, #1
1238	bge Ldivloop
1239	cmp r3, #0
1240	@ non-restoring fixup here (one instruction only!)
1241	addlt r3, r1, r3
1242
1243
1244	Lgot_result:
1245
1246	mov r0, r3
1247	ldmia sp!, {r4, r5, pc}
1248
1249	Ldiv_zero:
1250	@ Divide by zero trap. If it returns, return 0 (about as
1251	@ wrong as possible, but that is what SunOS does...).
1252	bl ___div0
1253	mov r0, #0
1254	ldmia sp!, {r4, r5, pc}
1255
1256	#endif /* L_umodsi3 */
1257
1258	#ifdef L_modsi3
1259
1260	ip .req r12
1261	sp .req r13
1262	lr .req r14
1263	pc .req r15
1264	.text
1265	.globl ___modsi3
1266	.align 0
1267	___modsi3:
1268	stmdb sp!, {r4, r5, r6, lr}
1269	@ compute sign of result; if neither is negative, no problem
1270	mov r6, r0
1271	cmp r1, #0
1272	rsbmi r1, r1, #0
1273	beq Ldiv_zero
1274	mov lr, r1
1275	movs r3, r0
1276	rsbmi r3, r3, #0 @ make dividend nonnegative
1277
1278
1279	cmp r3, lr @ if r1 exceeds r0, done
1280	mov r2, #0
1281	bcc Lgot_result @ (and algorithm fails otherwise)
1282	mov r4, #(1 << (32 - 4 - 1))
1283	cmp r3, r4
1284	mov ip, #0
1285	bcc Lnot_really_big
1286
1287	@ Here the dividend is >= 2^(31-N) or so. We must be careful here,
1288	@ as our usual N-at-a-shot divide step will cause overflow and havoc.
1289	@ The number of bits in the result here is N*ITER+SC, where SC <= N.
1290	@ Compute ITER in an unorthodox manner: know we need to shift V into
1291	@ the top decade: so do not even bother to compare to R.
1292	mov r5, #1
1293	1:
1294	cmp lr, r4
1295	bcs 3f
1296	mov lr, lr, lsl #4
1297	add ip, ip, #1
1298	b 1b
1299
1300	@ Now compute r5.
1301	2: adds lr, lr, lr
1302	add r5, r5, #1
1303	bcc Lnot_too_big
1304
1305	@ We get here if the r1 overflowed while shifting.
1306	@ This means that r3 has the high-order bit set.
1307	@ Restore lr and subtract from r3.
1308	mov r4, r4, lsl #4
1309	mov lr, lr, lsr #1
1310	add lr, r4, lr
1311	sub r5, r5, #1
1312	b Ldo_single_div
1313
1314	Lnot_too_big:
1315	3: cmp lr, r3
1316	bcc 2b
1317	@ beq Ldo_single_div
1318
1319	/* NB: these are commented out in the V8-Sparc manual as well */
1320	/* (I do not understand this) */
1321	@ lr > r3: went too far: back up 1 step
1322	@ srl lr, 1, lr
1323	@ dec r5
1324	@ do single-bit divide steps
1325	@
1326	@ We have to be careful here. We know that r3 >= lr, so we can do the
1327	@ first divide step without thinking. BUT, the others are conditional,
1328	@ and are only done if r3 >= 0. Because both r3 and lr may have the high-
1329	@ order bit set in the first step, just falling into the regular
1330	@ division loop will mess up the first time around.
1331	@ So we unroll slightly...
1332	Ldo_single_div:
1333	subs r5, r5, #1
1334	blt Lend_regular_divide
1335	sub r3, r3, lr
1336	mov r2, #1
1337	b Lend_single_divloop
1338	Lsingle_divloop:
1339	cmp r3, #0
1340	mov r2, r2, lsl #1
1341	mov lr, lr, lsr #1
1342	@ r3 >= 0
1343	subpl r3, r3, lr
1344	addpl r2, r2, #1
1345	@ r3 < 0
1346	addmi r3, r3, lr
1347	submi r2, r2, #1
1348	Lend_single_divloop:
1349	subs r5, r5, #1
1350	bge Lsingle_divloop
1351	b Lend_regular_divide
1352
1353	1:
1354	add ip, ip, #1
1355	Lnot_really_big:
1356	mov lr, lr, lsl #4
1357	cmp lr, r3
1358	bls 1b
1359	@
1360	@ HOW CAN ip EVER BE -1 HERE ?????
1361	@
1362	cmn ip, #1
1363	beq Lgot_result
1364
1365	Ldivloop:
1366	cmp r3, #0 @ set up for initial iteration
1367	mov r2, r2, lsl #4
1368	@ depth 1, accumulated bits 0
1369	mov lr, lr, lsr #1
1370	blt L.1.1015
1371	@ remainder is positive
1372	subs r3, r3, lr
1373	@ depth 2, accumulated bits 1
1374	mov lr, lr, lsr #1
1375	blt L.2.1016
1376	@ remainder is positive
1377	subs r3, r3, lr
1378	@ depth 3, accumulated bits 3
1379	mov lr, lr, lsr #1
1380	blt L.3.1018
1381	@ remainder is positive
1382	subs r3, r3, lr
1383	@ depth 4, accumulated bits 7
1384	mov lr, lr, lsr #1
1385	blt L.4.1022
1386	@ remainder is positive
1387	subs r3, r3, lr
1388	add r2, r2, #15
1389
1390	b 9f
1391
1392	L.4.1022:
1393	@ remainder is negative
1394	adds r3, r3, lr
1395	add r2, r2, #13
1396	b 9f
1397
1398
1399
1400	L.3.1018:
1401	@ remainder is negative
1402	adds r3, r3, lr
1403	@ depth 4, accumulated bits 5
1404	mov lr, lr, lsr #1
1405	blt L.4.1020
1406	@ remainder is positive
1407	subs r3, r3, lr
1408	add r2, r2, #11
1409
1410	b 9f
1411
1412	L.4.1020:
1413	@ remainder is negative
1414	adds r3, r3, lr
1415	add r2, r2, #9
1416	b 9f
1417
1418
1419
1420
1421	L.2.1016:
1422	@ remainder is negative
1423	adds r3, r3, lr
1424	@ depth 3, accumulated bits 1
1425	mov lr, lr, lsr #1
1426	blt L.3.1016
1427	@ remainder is positive
1428	subs r3, r3, lr
1429	@ depth 4, accumulated bits 3
1430	mov lr, lr, lsr #1
1431	blt L.4.1018
1432	@ remainder is positive
1433	subs r3, r3, lr
1434	add r2, r2, #7
1435
1436	b 9f
1437
1438	L.4.1018:
1439	@ remainder is negative
1440	adds r3, r3, lr
1441	add r2, r2, #5
1442	b 9f
1443
1444
1445
1446	L.3.1016:
1447	@ remainder is negative
1448	adds r3, r3, lr
1449	@ depth 4, accumulated bits 1
1450	mov lr, lr, lsr #1
1451	blt L.4.1016
1452	@ remainder is positive
1453	subs r3, r3, lr
1454	add r2, r2, #3
1455
1456	b 9f
1457
1458	L.4.1016:
1459	@ remainder is negative
1460	adds r3, r3, lr
1461	add r2, r2, #1
1462	b 9f
1463
1464
1465
1466
1467
1468	L.1.1015:
1469	@ remainder is negative
1470	adds r3, r3, lr
1471	@ depth 2, accumulated bits -1
1472	mov lr, lr, lsr #1
1473	blt L.2.1014
1474	@ remainder is positive
1475	subs r3, r3, lr
1476	@ depth 3, accumulated bits -1
1477	mov lr, lr, lsr #1
1478	blt L.3.1014
1479	@ remainder is positive
1480	subs r3, r3, lr
1481	@ depth 4, accumulated bits -1
1482	mov lr, lr, lsr #1
1483	blt L.4.1014
1484	@ remainder is positive
1485	subs r3, r3, lr
1486	sub r2, r2, #1
1487
1488	b 9f
1489
1490	L.4.1014:
1491	@ remainder is negative
1492	adds r3, r3, lr
1493	sub r2, r2, #3
1494	b 9f
1495
1496
1497
1498	L.3.1014:
1499	@ remainder is negative
1500	adds r3, r3, lr
1501	@ depth 4, accumulated bits -3
1502	mov lr, lr, lsr #1
1503	blt L.4.1012
1504	@ remainder is positive
1505	subs r3, r3, lr
1506	sub r2, r2, #5
1507
1508	b 9f
1509
1510	L.4.1012:
1511	@ remainder is negative
1512	adds r3, r3, lr
1513	sub r2, r2, #7
1514	b 9f
1515
1516
1517
1518
1519	L.2.1014:
1520	@ remainder is negative
1521	adds r3, r3, lr
1522	@ depth 3, accumulated bits -3
1523	mov lr, lr, lsr #1
1524	blt L.3.1012
1525	@ remainder is positive
1526	subs r3, r3, lr
1527	@ depth 4, accumulated bits -5
1528	mov lr, lr, lsr #1
1529	blt L.4.1010
1530	@ remainder is positive
1531	subs r3, r3, lr
1532	sub r2, r2, #9
1533
1534	b 9f
1535
1536	L.4.1010:
1537	@ remainder is negative
1538	adds r3, r3, lr
1539	sub r2, r2, #11
1540	b 9f
1541
1542
1543
1544	L.3.1012:
1545	@ remainder is negative
1546	adds r3, r3, lr
1547	@ depth 4, accumulated bits -7
1548	mov lr, lr, lsr #1
1549	blt L.4.1008
1550	@ remainder is positive
1551	subs r3, r3, lr
1552	sub r2, r2, #13
1553
1554	b 9f
1555
1556	L.4.1008:
1557	@ remainder is negative
1558	adds r3, r3, lr
1559	sub r2, r2, #15
1560	b 9f
1561
1562
1563
1564
1565
1566	9:
1567	Lend_regular_divide:
1568	subs ip, ip, #1
1569	bge Ldivloop
1570	cmp r3, #0
1571	@ non-restoring fixup here (one instruction only!)
1572	addlt r3, r1, r3
1573
1574
1575	Lgot_result:
1576	@ check to see if answer should be < 0
1577	cmp r6, #0
1578	rsbmi r3, r3, #0
1579
1580	mov r0, r3
1581	ldmia sp!, {r4, r5, r6, pc}
1582
1583	Ldiv_zero:
1584	@ Divide by zero trap. If it returns, return 0 (about as
1585	@ wrong as possible, but that is what SunOS does...).
1586	bl ___div0
1587	mov r0, #0
1588	ldmia sp!, {r4, r5, r6, pc}
1589
1590	#endif /* L_modsi3 */
1591
1592	#ifdef L_dvmd_tls
1593
1594	.globl ___div0
1595	.align 0
1596	___div0:
1597	mov pc, lr
1598
1599	#endif /* L_divmodsi_tools */

Note: See TracBrowser for help on using the repository browser.

Download in other formats: