1 | /* longlong.h -- definitions for mixed size 32/64 bit arithmetic. |
---|
2 | |
---|
3 | Copyright 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003 |
---|
4 | Free Software Foundation, Inc. |
---|
5 | |
---|
6 | This file is free software; you can redistribute it and/or modify |
---|
7 | it under the terms of the GNU Lesser General Public License as published by |
---|
8 | the Free Software Foundation; either version 2.1 of the License, or (at your |
---|
9 | option) any later version. |
---|
10 | |
---|
11 | This file is distributed in the hope that it will be useful, but |
---|
12 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
---|
13 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
---|
14 | License for more details. |
---|
15 | |
---|
16 | You should have received a copy of the GNU Lesser General Public License |
---|
17 | along with this file; see the file COPYING.LIB. If not, write to |
---|
18 | the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
---|
19 | MA 02111-1307, USA. */ |
---|
20 | |
---|
21 | /* You have to define the following before including this file: |
---|
22 | |
---|
23 | UWtype -- An unsigned type, default type for operations (typically a "word") |
---|
24 | UHWtype -- An unsigned type, at least half the size of UWtype. |
---|
25 | UDWtype -- An unsigned type, at least twice as large a UWtype |
---|
26 | W_TYPE_SIZE -- size in bits of UWtype |
---|
27 | |
---|
28 | SItype, USItype -- Signed and unsigned 32 bit types. |
---|
29 | DItype, UDItype -- Signed and unsigned 64 bit types. |
---|
30 | |
---|
31 | On a 32 bit machine UWtype should typically be USItype; |
---|
32 | on a 64 bit machine, UWtype should typically be UDItype. |
---|
33 | */ |
---|
34 | |
---|
35 | #define __BITS4 (W_TYPE_SIZE / 4) |
---|
36 | #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) |
---|
37 | #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) |
---|
38 | #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) |
---|
39 | |
---|
40 | /* This is used to make sure no undesirable sharing between different libraries |
---|
41 | that use this file takes place. */ |
---|
42 | #ifndef __MPN |
---|
43 | #define __MPN(x) __##x |
---|
44 | #endif |
---|
45 | |
---|
46 | #ifndef _PROTO |
---|
47 | #if (__STDC__-0) || defined (__cplusplus) |
---|
48 | #define _PROTO(x) x |
---|
49 | #else |
---|
50 | #define _PROTO(x) () |
---|
51 | #endif |
---|
52 | #endif |
---|
53 | |
---|
54 | /* Define auxiliary asm macros. |
---|
55 | |
---|
56 | 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two |
---|
57 | UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype |
---|
58 | word product in HIGH_PROD and LOW_PROD. |
---|
59 | |
---|
60 | 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a |
---|
61 | UDWtype product. This is just a variant of umul_ppmm. |
---|
62 | |
---|
63 | 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, |
---|
64 | denominator) divides a UDWtype, composed by the UWtype integers |
---|
65 | HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient |
---|
66 | in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less |
---|
67 | than DENOMINATOR for correct operation. If, in addition, the most |
---|
68 | significant bit of DENOMINATOR must be 1, then the pre-processor symbol |
---|
69 | UDIV_NEEDS_NORMALIZATION is defined to 1. |
---|
70 | |
---|
71 | 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, |
---|
72 | denominator). Like udiv_qrnnd but the numbers are signed. The quotient |
---|
73 | is rounded towards 0. |
---|
74 | |
---|
75 | 5) count_leading_zeros(count, x) counts the number of zero-bits from the |
---|
76 | msb to the first non-zero bit in the UWtype X. This is the number of |
---|
77 | steps X needs to be shifted left to set the msb. Undefined for X == 0, |
---|
78 | unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value. |
---|
79 | |
---|
80 | 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts |
---|
81 | from the least significant end. |
---|
82 | |
---|
83 | 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, |
---|
84 | high_addend_2, low_addend_2) adds two UWtype integers, composed by |
---|
85 | HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 |
---|
86 | respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow |
---|
87 | (i.e. carry out) is not stored anywhere, and is lost. |
---|
88 | |
---|
89 | 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, |
---|
90 | high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, |
---|
91 | composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and |
---|
92 | LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE |
---|
93 | and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, |
---|
94 | and is lost. |
---|
95 | |
---|
96 | If any of these macros are left undefined for a particular CPU, |
---|
97 | C macros are used. |
---|
98 | |
---|
99 | |
---|
100 | Notes: |
---|
101 | |
---|
102 | For add_ssaaaa the two high and two low addends can both commute, but |
---|
103 | unfortunately gcc only supports one "%" commutative in each asm block. |
---|
104 | This has always been so but is only documented in recent versions |
---|
105 | (eg. pre-release 3.3). Having two or more "%"s can cause an internal |
---|
106 | compiler error in certain rare circumstances. |
---|
107 | |
---|
108 | Apparently it was only the last "%" that was ever actually respected, so |
---|
109 | the code has been updated to leave just that. Clearly there's a free |
---|
110 | choice whether high or low should get it, if there's a reason to favour |
---|
111 | one over the other. Also obviously when the constraints on the two |
---|
112 | operands are identical there's no benefit to the reloader in any "%" at |
---|
113 | all. |
---|
114 | |
---|
115 | */ |
---|
116 | |
---|
117 | /* The CPUs come in alphabetical order below. |
---|
118 | |
---|
119 | Please add support for more CPUs here, or improve the current support |
---|
120 | for the CPUs below! */ |
---|
121 | |
---|
122 | /* FIXME: The macros using external routines like __MPN(count_leading_zeros) |
---|
123 | don't need to be under !NO_ASM */ |
---|
124 | #if ! defined (NO_ASM) |
---|
125 | |
---|
126 | #if defined (__alpha) && W_TYPE_SIZE == 64 |
---|
127 | /* Most alpha-based machines, except Cray systems. */ |
---|
128 | #if defined (__GNUC__) |
---|
129 | #define umul_ppmm(ph, pl, m0, m1) \ |
---|
130 | do { \ |
---|
131 | UDItype __m0 = (m0), __m1 = (m1); \ |
---|
132 | __asm__ ("umulh %r1,%2,%0" \ |
---|
133 | : "=r" (ph) \ |
---|
134 | : "%rJ" (m0), "rI" (m1)); \ |
---|
135 | (pl) = __m0 * __m1; \ |
---|
136 | } while (0) |
---|
137 | #define UMUL_TIME 18 |
---|
138 | #else /* ! __GNUC__ */ |
---|
139 | #include <machine/builtins.h> |
---|
140 | #define umul_ppmm(ph, pl, m0, m1) \ |
---|
141 | do { \ |
---|
142 | UDItype __m0 = (m0), __m1 = (m1); \ |
---|
143 | (ph) = __UMULH (m0, m1); \ |
---|
144 | (pl) = __m0 * __m1; \ |
---|
145 | } while (0) |
---|
146 | #endif |
---|
147 | #ifndef LONGLONG_STANDALONE |
---|
148 | #define udiv_qrnnd(q, r, n1, n0, d) \ |
---|
149 | do { UWtype __di; \ |
---|
150 | __di = __MPN(invert_limb) (d); \ |
---|
151 | udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \ |
---|
152 | } while (0) |
---|
153 | #define UDIV_PREINV_ALWAYS 1 |
---|
154 | #define UDIV_NEEDS_NORMALIZATION 1 |
---|
155 | #define UDIV_TIME 220 |
---|
156 | #endif /* LONGLONG_STANDALONE */ |
---|
157 | /* clz_tab is required by mpn/alpha/cntlz.asm, and that file is built for |
---|
158 | all alphas, even though ev67 and ev68 don't need it. */ |
---|
159 | #define COUNT_LEADING_ZEROS_NEED_CLZ_TAB |
---|
160 | #if defined (__GNUC__) && (HAVE_HOST_CPU_alphaev67 || HAVE_HOST_CPU_alphaev68) |
---|
161 | #define count_leading_zeros(COUNT,X) \ |
---|
162 | __asm__("ctlz %1,%0" : "=r"(COUNT) : "r"(X)) |
---|
163 | #define count_trailing_zeros(COUNT,X) \ |
---|
164 | __asm__("cttz %1,%0" : "=r"(COUNT) : "r"(X)) |
---|
165 | #else /* ! (ev67 || ev68) */ |
---|
166 | #ifndef LONGLONG_STANDALONE |
---|
167 | #if HAVE_ATTRIBUTE_CONST |
---|
168 | long __MPN(count_leading_zeros) _PROTO ((UDItype)) __attribute__ ((const)); |
---|
169 | #else |
---|
170 | long __MPN(count_leading_zeros) _PROTO ((UDItype)); |
---|
171 | #endif |
---|
172 | #define count_leading_zeros(count, x) \ |
---|
173 | ((count) = __MPN(count_leading_zeros) (x)) |
---|
174 | #endif /* LONGLONG_STANDALONE */ |
---|
175 | #endif /* ! (ev67 || ev68) */ |
---|
176 | #endif /* __alpha */ |
---|
177 | |
---|
178 | #if defined (_CRAY) && W_TYPE_SIZE == 64 |
---|
179 | #include <intrinsics.h> |
---|
180 | #define UDIV_PREINV_ALWAYS 1 |
---|
181 | #define UDIV_NEEDS_NORMALIZATION 1 |
---|
182 | #define UDIV_TIME 220 |
---|
183 | long __MPN(count_leading_zeros) _PROTO ((UDItype)); |
---|
184 | #define count_leading_zeros(count, x) \ |
---|
185 | ((count) = _leadz ((UWtype) (x))) |
---|
186 | #if defined (_CRAYIEEE) /* I.e., Cray T90/ieee, T3D, and T3E */ |
---|
187 | #define umul_ppmm(ph, pl, m0, m1) \ |
---|
188 | do { \ |
---|
189 | UDItype __m0 = (m0), __m1 = (m1); \ |
---|
190 | (ph) = _int_mult_upper (m0, m1); \ |
---|
191 | (pl) = __m0 * __m1; \ |
---|
192 | } while (0) |
---|
193 | #ifndef LONGLONG_STANDALONE |
---|
194 | #define udiv_qrnnd(q, r, n1, n0, d) \ |
---|
195 | do { UWtype __di; \ |
---|
196 | __di = __MPN(invert_limb) (d); \ |
---|
197 | udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \ |
---|
198 | } while (0) |
---|
199 | #endif /* LONGLONG_STANDALONE */ |
---|
200 | #endif /* _CRAYIEEE */ |
---|
201 | #endif /* _CRAY */ |
---|
202 | |
---|
203 | #if defined (__hppa) && W_TYPE_SIZE == 64 |
---|
204 | /* These macros are for ABI=2.0w. In ABI=2.0n they can't be used, since GCC |
---|
205 | (3.2) puts longlong into two adjacent 32-bit registers. Presumably this |
---|
206 | is just a case of no direct support for 2.0n but treating it like 1.0. */ |
---|
207 | #if defined (__GNUC__) && ! defined (_LONG_LONG_LIMB) |
---|
208 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |
---|
209 | __asm__ ("add %4,%5,%1\n\tadd,dc %2,%3,%0" \ |
---|
210 | : "=r" (sh), "=&r" (sl) \ |
---|
211 | : "rM" (ah), "rM" (bh), "%rM" (al), "rM" (bl)) |
---|
212 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |
---|
213 | __asm__ ("sub %4,%5,%1\n\tsub,db %2,%3,%0" \ |
---|
214 | : "=r" (sh), "=&r" (sl) \ |
---|
215 | : "rM" (ah), "rM" (bh), "rM" (al), "rM" (bl)) |
---|
216 | #endif |
---|
217 | /* We put the result pointer parameter last here, since it makes passing |
---|
218 | of the other parameters more efficient. */ |
---|
219 | #ifndef LONGLONG_STANDALONE |
---|
220 | #define umul_ppmm(wh, wl, u, v) \ |
---|
221 | do { \ |
---|
222 | UWtype __p0; \ |
---|
223 | (wh) = __MPN(umul_ppmm) (u, v, &__p0); \ |
---|
224 | (wl) = __p0; \ |
---|
225 | } while (0) |
---|
226 | extern UWtype __MPN(umul_ppmm) _PROTO ((UWtype, UWtype, UWtype *)); |
---|
227 | #define udiv_qrnnd(q, r, n1, n0, d) \ |
---|
228 | do { UWtype __r; \ |
---|
229 | (q) = __MPN(udiv_qrnnd) (n1, n0, d, &__r); \ |
---|
230 | (r) = __r; \ |
---|
231 | } while (0) |
---|
232 | extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype, UWtype, UWtype, UWtype *)); |
---|
233 | #define UMUL_TIME 8 |
---|
234 | #define UDIV_TIME 60 |
---|
235 | #endif /* LONGLONG_STANDALONE */ |
---|
236 | #endif /* hppa */ |
---|
237 | |
---|
238 | #if defined (__ia64) && W_TYPE_SIZE == 64 |
---|
239 | #if defined (__GNUC__) |
---|
240 | #define umul_ppmm(ph, pl, m0, m1) \ |
---|
241 | do { \ |
---|
242 | UDItype __m0 = (m0), __m1 = (m1); \ |
---|
243 | __asm__ ("xma.hu %0 = %1, %2, f0" \ |
---|
244 | : "=f" (ph) \ |
---|
245 | : "f" (m0), "f" (m1)); \ |
---|
246 | (pl) = __m0 * __m1; \ |
---|
247 | } while (0) |
---|
248 | #define UMUL_TIME 14 |
---|
249 | #define count_leading_zeros(count, x) \ |
---|
250 | do { \ |
---|
251 | UWtype _x = (x), _y, _a, _c; \ |
---|
252 | __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x)); \ |
---|
253 | __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y)); \ |
---|
254 | _c = (_a - 1) << 3; \ |
---|
255 | _x >>= _c; \ |
---|
256 | if (_x >= 1 << 4) \ |
---|
257 | _x >>= 4, _c += 4; \ |
---|
258 | if (_x >= 1 << 2) \ |
---|
259 | _x >>= 2, _c += 2; \ |
---|
260 | _c += _x >> 1; \ |
---|
261 | (count) = W_TYPE_SIZE - 1 - _c; \ |
---|
262 | } while (0) |
---|
263 | #endif |
---|
264 | #ifndef LONGLONG_STANDALONE |
---|
265 | #define udiv_qrnnd(q, r, n1, n0, d) \ |
---|
266 | do { UWtype __di; \ |
---|
267 | __di = __MPN(invert_limb) (d); \ |
---|
268 | udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \ |
---|
269 | } while (0) |
---|
270 | #define UDIV_PREINV_ALWAYS 1 |
---|
271 | #define UDIV_NEEDS_NORMALIZATION 1 |
---|
272 | #endif |
---|
273 | #define UDIV_TIME 220 |
---|
274 | #endif |
---|
275 | |
---|
276 | |
---|
277 | #if defined (__GNUC__) |
---|
278 | |
---|
279 | /* We sometimes need to clobber "cc" with gcc2, but that would not be |
---|
280 | understood by gcc1. Use cpp to avoid major code duplication. */ |
---|
281 | #if __GNUC__ < 2 |
---|
282 | #define __CLOBBER_CC |
---|
283 | #define __AND_CLOBBER_CC |
---|
284 | #else /* __GNUC__ >= 2 */ |
---|
285 | #define __CLOBBER_CC : "cc" |
---|
286 | #define __AND_CLOBBER_CC , "cc" |
---|
287 | #endif /* __GNUC__ < 2 */ |
---|
288 | |
---|
289 | #if (defined (__a29k__) || defined (_AM29K)) && W_TYPE_SIZE == 32 |
---|
290 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |
---|
291 | __asm__ ("add %1,%4,%5\n\taddc %0,%2,%3" \ |
---|
292 | : "=r" (sh), "=&r" (sl) \ |
---|
293 | : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl)) |
---|
294 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |
---|
295 | __asm__ ("sub %1,%4,%5\n\tsubc %0,%2,%3" \ |
---|
296 | : "=r" (sh), "=&r" (sl) \ |
---|
297 | : "r" (ah), "rI" (bh), "r" (al), "rI" (bl)) |
---|
298 | #define umul_ppmm(xh, xl, m0, m1) \ |
---|
299 | do { \ |
---|
300 | USItype __m0 = (m0), __m1 = (m1); \ |
---|
301 | __asm__ ("multiplu %0,%1,%2" \ |
---|
302 | : "=r" (xl) \ |
---|
303 | : "r" (__m0), "r" (__m1)); \ |
---|
304 | __asm__ ("multmu %0,%1,%2" \ |
---|
305 | : "=r" (xh) \ |
---|
306 | : "r" (__m0), "r" (__m1)); \ |
---|
307 | } while (0) |
---|
308 | #define udiv_qrnnd(q, r, n1, n0, d) \ |
---|
309 | __asm__ ("dividu %0,%3,%4" \ |
---|
310 | : "=r" (q), "=q" (r) \ |
---|
311 | : "1" (n1), "r" (n0), "r" (d)) |
---|
312 | #define count_leading_zeros(count, x) \ |
---|
313 | __asm__ ("clz %0,%1" \ |
---|
314 | : "=r" (count) \ |
---|
315 | : "r" (x)) |
---|
316 | #define COUNT_LEADING_ZEROS_0 32 |
---|
317 | #endif /* __a29k__ */ |
---|
318 | |
---|
319 | #if defined (__arc__) |
---|
320 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |
---|
321 | __asm__ ("add.f\t%1, %4, %5\n\tadc\t%0, %2, %3" \ |
---|
322 | : "=r" ((USItype) (sh)), \ |
---|
323 | "=&r" ((USItype) (sl)) \ |
---|
324 | : "r" ((USItype) (ah)), \ |
---|
325 | "rIJ" ((USItype) (bh)), \ |
---|
326 | "%r" ((USItype) (al)), \ |
---|
327 | "rIJ" ((USItype) (bl))) |
---|
328 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |
---|
329 | __asm__ ("sub.f\t%1, %4, %5\n\tsbc\t%0, %2, %3" \ |
---|
330 | : "=r" ((USItype) (sh)), \ |
---|
331 | "=&r" ((USItype) (sl)) \ |
---|
332 | : "r" ((USItype) (ah)), \ |
---|
333 | "rIJ" ((USItype) (bh)), \ |
---|
334 | "r" ((USItype) (al)), \ |
---|
335 | "rIJ" ((USItype) (bl))) |
---|
336 | #endif |
---|
337 | |
---|
338 | #if defined (__arm__) && W_TYPE_SIZE == 32 |
---|
339 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |
---|
340 | __asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3" \ |
---|
341 | : "=r" (sh), "=&r" (sl) \ |
---|
342 | : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC) |
---|
343 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |
---|
344 | do { \ |
---|
345 | if (__builtin_constant_p (al)) \ |
---|
346 | { \ |
---|
347 | if (__builtin_constant_p (ah)) \ |
---|
348 | __asm__ ("rsbs\t%1, %5, %4\n\trsc\t%0, %3, %2" \ |
---|
349 | : "=r" (sh), "=&r" (sl) \ |
---|
350 | : "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \ |
---|
351 | else \ |
---|
352 | __asm__ ("rsbs\t%1, %5, %4\n\tsbc\t%0, %2, %3" \ |
---|
353 | : "=r" (sh), "=&r" (sl) \ |
---|
354 | : "r" (ah), "rI" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \ |
---|
355 | } \ |
---|
356 | else if (__builtin_constant_p (ah)) \ |
---|
357 | { \ |
---|
358 | if (__builtin_constant_p (bl)) \ |
---|
359 | __asm__ ("subs\t%1, %4, %5\n\trsc\t%0, %3, %2" \ |
---|
360 | : "=r" (sh), "=&r" (sl) \ |
---|
361 | : "rI" (ah), "r" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \ |
---|
362 | else \ |
---|
363 | __asm__ ("rsbs\t%1, %5, %4\n\trsc\t%0, %3, %2" \ |
---|
364 | : "=r" (sh), "=&r" (sl) \ |
---|
365 | : "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \ |
---|
366 | } \ |
---|
367 | else if (__builtin_constant_p (bl)) \ |
---|
368 | { \ |
---|
369 | if (__builtin_constant_p (bh)) \ |
---|
370 | __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \ |
---|
371 | : "=r" (sh), "=&r" (sl) \ |
---|
372 | : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \ |
---|
373 | else \ |
---|
374 | __asm__ ("subs\t%1, %4, %5\n\trsc\t%0, %3, %2" \ |
---|
375 | : "=r" (sh), "=&r" (sl) \ |
---|
376 | : "rI" (ah), "r" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \ |
---|
377 | } \ |
---|
378 | else /* only bh might be a constant */ \ |
---|
379 | __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \ |
---|
380 | : "=r" (sh), "=&r" (sl) \ |
---|
381 | : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC);\ |
---|
382 | } while (0) |
---|
383 | #if 1 || defined (__arm_m__) /* `M' series has widening multiply support */ |
---|
384 | #define umul_ppmm(xh, xl, a, b) \ |
---|
385 | __asm__ ("umull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b)) |
---|
386 | #define UMUL_TIME 5 |
---|
387 | #define smul_ppmm(xh, xl, a, b) \ |
---|
388 | __asm__ ("smull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b)) |
---|
389 | #ifndef LONGLONG_STANDALONE |
---|
390 | #define udiv_qrnnd(q, r, n1, n0, d) \ |
---|
391 | do { UWtype __di; \ |
---|
392 | __di = __MPN(invert_limb) (d); \ |
---|
393 | udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \ |
---|
394 | } while (0) |
---|
395 | #define UDIV_PREINV_ALWAYS 1 |
---|
396 | #define UDIV_NEEDS_NORMALIZATION 1 |
---|
397 | #define UDIV_TIME 70 |
---|
398 | #endif /* LONGLONG_STANDALONE */ |
---|
399 | #else |
---|
400 | #define umul_ppmm(xh, xl, a, b) \ |
---|
401 | __asm__ ("%@ Inlined umul_ppmm\n" \ |
---|
402 | " mov %|r0, %2, lsr #16\n" \ |
---|
403 | " mov %|r2, %3, lsr #16\n" \ |
---|
404 | " bic %|r1, %2, %|r0, lsl #16\n" \ |
---|
405 | " bic %|r2, %3, %|r2, lsl #16\n" \ |
---|
406 | " mul %1, %|r1, %|r2\n" \ |
---|
407 | " mul %|r2, %|r0, %|r2\n" \ |
---|
408 | " mul %|r1, %0, %|r1\n" \ |
---|
409 | " mul %0, %|r0, %0\n" \ |
---|
410 | " adds %|r1, %|r2, %|r1\n" \ |
---|
411 | " addcs %0, %0, #65536\n" \ |
---|
412 | " adds %1, %1, %|r1, lsl #16\n" \ |
---|
413 | " adc %0, %0, %|r1, lsr #16" \ |
---|
414 | : "=&r" (xh), "=r" (xl) \ |
---|
415 | : "r" (a), "r" (b) \ |
---|
416 | : "r0", "r1", "r2") |
---|
417 | #define UMUL_TIME 20 |
---|
418 | #ifndef LONGLONG_STANDALONE |
---|
419 | #define udiv_qrnnd(q, r, n1, n0, d) \ |
---|
420 | do { UWtype __r; \ |
---|
421 | (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \ |
---|
422 | (r) = __r; \ |
---|
423 | } while (0) |
---|
424 | extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype)); |
---|
425 | #define UDIV_TIME 200 |
---|
426 | #endif /* LONGLONG_STANDALONE */ |
---|
427 | #endif |
---|
428 | #endif /* __arm__ */ |
---|
429 | |
---|
430 | #if defined (__clipper__) && W_TYPE_SIZE == 32 |
---|
431 | #define umul_ppmm(w1, w0, u, v) \ |
---|
432 | ({union {UDItype __ll; \ |
---|
433 | struct {USItype __l, __h;} __i; \ |
---|
434 | } __x; \ |
---|
435 | __asm__ ("mulwux %2,%0" \ |
---|
436 | : "=r" (__x.__ll) \ |
---|
437 | : "%0" ((USItype)(u)), "r" ((USItype)(v))); \ |
---|
438 | (w1) = __x.__i.__h; (w0) = __x.__i.__l;}) |
---|
439 | #define smul_ppmm(w1, w0, u, v) \ |
---|
440 | ({union {DItype __ll; \ |
---|
441 | struct {SItype __l, __h;} __i; \ |
---|
442 | } __x; \ |
---|
443 | __asm__ ("mulwx %2,%0" \ |
---|
444 | : "=r" (__x.__ll) \ |
---|
445 | : "%0" ((SItype)(u)), "r" ((SItype)(v))); \ |
---|
446 | (w1) = __x.__i.__h; (w0) = __x.__i.__l;}) |
---|
447 | #define __umulsidi3(u, v) \ |
---|
448 | ({UDItype __w; \ |
---|
449 | __asm__ ("mulwux %2,%0" \ |
---|
450 | : "=r" (__w) : "%0" ((USItype)(u)), "r" ((USItype)(v))); \ |
---|
451 | __w; }) |
---|
452 | #endif /* __clipper__ */ |
---|
453 | |
---|
454 | /* Fujitsu vector computers. */ |
---|
455 | #if defined (__uxp__) && W_TYPE_SIZE == 32 |
---|
456 | #define umul_ppmm(ph, pl, u, v) \ |
---|
457 | do { \ |
---|
458 | union {UDItype __ll; \ |
---|
459 | struct {USItype __h, __l;} __i; \ |
---|
460 | } __x; \ |
---|
461 | __asm__ ("mult.lu %1,%2,%0" : "=r" (__x.__ll) : "%r" (u), "rK" (v));\ |
---|
462 | (ph) = __x.__i.__h; \ |
---|
463 | (pl) = __x.__i.__l; \ |
---|
464 | } while (0) |
---|
465 | #define smul_ppmm(ph, pl, u, v) \ |
---|
466 | do { \ |
---|
467 | union {UDItype __ll; \ |
---|
468 | struct {USItype __h, __l;} __i; \ |
---|
469 | } __x; \ |
---|
470 | __asm__ ("mult.l %1,%2,%0" : "=r" (__x.__ll) : "%r" (u), "rK" (v)); \ |
---|
471 | (ph) = __x.__i.__h; \ |
---|
472 | (pl) = __x.__i.__l; \ |
---|
473 | } while (0) |
---|
474 | #endif |
---|
475 | |
---|
476 | #if defined (__gmicro__) && W_TYPE_SIZE == 32 |
---|
477 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |
---|
478 | __asm__ ("add.w %5,%1\n\taddx %3,%0" \ |
---|
479 | : "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \ |
---|
480 | : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ |
---|
481 | "%1" ((USItype)(al)), "g" ((USItype)(bl))) |
---|
482 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |
---|
483 | __asm__ ("sub.w %5,%1\n\tsubx %3,%0" \ |
---|
484 | : "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \ |
---|
485 | : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ |
---|
486 | "1" ((USItype)(al)), "g" ((USItype)(bl))) |
---|
487 | #define umul_ppmm(ph, pl, m0, m1) \ |
---|
488 | __asm__ ("mulx %3,%0,%1" \ |
---|
489 | : "=g" ((USItype)(ph)), "=r" ((USItype)(pl)) \ |
---|
490 | : "%0" ((USItype)(m0)), "g" ((USItype)(m1))) |
---|
491 | #define udiv_qrnnd(q, r, nh, nl, d) \ |
---|
492 | __asm__ ("divx %4,%0,%1" \ |
---|
493 | : "=g" ((USItype)(q)), "=r" ((USItype)(r)) \ |
---|
494 | : "1" ((USItype)(nh)), "0" ((USItype)(nl)), "g" ((USItype)(d))) |
---|
495 | #define count_leading_zeros(count, x) \ |
---|
496 | __asm__ ("bsch/1 %1,%0" \ |
---|
497 | : "=g" (count) : "g" ((USItype)(x)), "0" ((USItype)0)) |
---|
498 | #endif |
---|
499 | |
---|
500 | #if defined (__hppa) && W_TYPE_SIZE == 32 |
---|
501 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |
---|
502 | __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0" \ |
---|
503 | : "=r" (sh), "=&r" (sl) \ |
---|
504 | : "rM" (ah), "rM" (bh), "%rM" (al), "rM" (bl)) |
---|
505 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |
---|
506 | __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0" \ |
---|
507 | : "=r" (sh), "=&r" (sl) \ |
---|
508 | : "rM" (ah), "rM" (bh), "rM" (al), "rM" (bl)) |
---|
509 | #if defined (_PA_RISC1_1) |
---|
510 | #define umul_ppmm(wh, wl, u, v) \ |
---|
511 | do { \ |
---|
512 | union {UDItype __ll; \ |
---|
513 | struct {USItype __h, __l;} __i; \ |
---|
514 | } __x; \ |
---|
515 | __asm__ ("xmpyu %1,%2,%0" : "=*f" (__x.__ll) : "*f" (u), "*f" (v)); \ |
---|
516 | (wh) = __x.__i.__h; \ |
---|
517 | (wl) = __x.__i.__l; \ |
---|
518 | } while (0) |
---|
519 | #define UMUL_TIME 8 |
---|
520 | #define UDIV_TIME 60 |
---|
521 | #else |
---|
522 | #define UMUL_TIME 40 |
---|
523 | #define UDIV_TIME 80 |
---|
524 | #endif |
---|
525 | #ifndef LONGLONG_STANDALONE |
---|
526 | #define udiv_qrnnd(q, r, n1, n0, d) \ |
---|
527 | do { UWtype __r; \ |
---|
528 | (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \ |
---|
529 | (r) = __r; \ |
---|
530 | } while (0) |
---|
531 | extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype)); |
---|
532 | #endif /* LONGLONG_STANDALONE */ |
---|
533 | #define count_leading_zeros(count, x) \ |
---|
534 | do { \ |
---|
535 | USItype __tmp; \ |
---|
536 | __asm__ ( \ |
---|
537 | "ldi 1,%0\n" \ |
---|
538 | " extru,= %1,15,16,%%r0 ; Bits 31..16 zero?\n" \ |
---|
539 | " extru,tr %1,15,16,%1 ; No. Shift down, skip add.\n" \ |
---|
540 | " ldo 16(%0),%0 ; Yes. Perform add.\n" \ |
---|
541 | " extru,= %1,23,8,%%r0 ; Bits 15..8 zero?\n" \ |
---|
542 | " extru,tr %1,23,8,%1 ; No. Shift down, skip add.\n" \ |
---|
543 | " ldo 8(%0),%0 ; Yes. Perform add.\n" \ |
---|
544 | " extru,= %1,27,4,%%r0 ; Bits 7..4 zero?\n" \ |
---|
545 | " extru,tr %1,27,4,%1 ; No. Shift down, skip add.\n" \ |
---|
546 | " ldo 4(%0),%0 ; Yes. Perform add.\n" \ |
---|
547 | " extru,= %1,29,2,%%r0 ; Bits 3..2 zero?\n" \ |
---|
548 | " extru,tr %1,29,2,%1 ; No. Shift down, skip add.\n" \ |
---|
549 | " ldo 2(%0),%0 ; Yes. Perform add.\n" \ |
---|
550 | " extru %1,30,1,%1 ; Extract bit 1.\n" \ |
---|
551 | " sub %0,%1,%0 ; Subtract it.\n" \ |
---|
552 | : "=r" (count), "=r" (__tmp) : "1" (x)); \ |
---|
553 | } while (0) |
---|
554 | #endif /* hppa */ |
---|
555 | |
---|
556 | #if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32 |
---|
557 | #define smul_ppmm(xh, xl, m0, m1) \ |
---|
558 | do { \ |
---|
559 | union {DItype __ll; \ |
---|
560 | struct {USItype __h, __l;} __i; \ |
---|
561 | } __x; \ |
---|
562 | __asm__ ("lr %N0,%1\n\tmr %0,%2" \ |
---|
563 | : "=&r" (__x.__ll) \ |
---|
564 | : "r" (m0), "r" (m1)); \ |
---|
565 | (xh) = __x.__i.__h; (xl) = __x.__i.__l; \ |
---|
566 | } while (0) |
---|
567 | #define sdiv_qrnnd(q, r, n1, n0, d) \ |
---|
568 | do { \ |
---|
569 | union {DItype __ll; \ |
---|
570 | struct {USItype __h, __l;} __i; \ |
---|
571 | } __x; \ |
---|
572 | __x.__i.__h = n1; __x.__i.__l = n0; \ |
---|
573 | __asm__ ("dr %0,%2" \ |
---|
574 | : "=r" (__x.__ll) \ |
---|
575 | : "0" (__x.__ll), "r" (d)); \ |
---|
576 | (q) = __x.__i.__l; (r) = __x.__i.__h; \ |
---|
577 | } while (0) |
---|
578 | #endif |
---|
579 | |
---|
580 | #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32 |
---|
581 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |
---|
582 | __asm__ ("addl %5,%1\n\tadcl %3,%0" \ |
---|
583 | : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \ |
---|
584 | : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ |
---|
585 | "%1" ((USItype)(al)), "g" ((USItype)(bl))) |
---|
586 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |
---|
587 | __asm__ ("subl %5,%1\n\tsbbl %3,%0" \ |
---|
588 | : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \ |
---|
589 | : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ |
---|
590 | "1" ((USItype)(al)), "g" ((USItype)(bl))) |
---|
591 | #define umul_ppmm(w1, w0, u, v) \ |
---|
592 | __asm__ ("mull %3" \ |
---|
593 | : "=a" (w0), "=d" (w1) \ |
---|
594 | : "%0" ((USItype)(u)), "rm" ((USItype)(v))) |
---|
595 | #define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding "=d" */\ |
---|
596 | __asm__ ("divl %4" /* stringification in K&R C */ \ |
---|
597 | : "=a" (q), "=d" (r) \ |
---|
598 | : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "rm" ((USItype)(dx))) |
---|
599 | |
---|
600 | /* P5 bsrl takes between 10 and 72 cycles depending where the most |
---|
601 | significant 1 bit is, hence the use of the alternatives below. bsfl is |
---|
602 | slow too, between 18 and 42 depending where the least significant 1 bit |
---|
603 | is. The faster count_leading_zeros are pressed into service via the |
---|
604 | generic count_trailing_zeros at the end of the file. */ |
---|
605 | |
---|
606 | #if HAVE_HOST_CPU_i586 || HAVE_HOST_CPU_pentium |
---|
607 | |
---|
608 | /* The following should be a fixed 14 cycles or so. Some scheduling |
---|
609 | opportunities should be available between the float load/store too. This |
---|
610 | is used (with "n&-n" to get trailing zeros) in gcc 3 for __builtin_ffs |
---|
611 | and is apparently suggested by the Intel optimizing manual (don't know |
---|
612 | exactly where). gcc 2.95 or up will be best for this, so the "double" is |
---|
613 | correctly aligned on the stack. */ |
---|
614 | |
---|
615 | #define count_leading_zeros(c,n) \ |
---|
616 | do { \ |
---|
617 | union { \ |
---|
618 | double d; \ |
---|
619 | unsigned a[2]; \ |
---|
620 | } __u; \ |
---|
621 | ASSERT ((n) != 0); \ |
---|
622 | __u.d = (UWtype) (n); \ |
---|
623 | (c) = 0x3FF + 31 - (__u.a[1] >> 20); \ |
---|
624 | } while (0) |
---|
625 | #define COUNT_LEADING_ZEROS_0 (0x3FF + 31) |
---|
626 | |
---|
627 | #else /* ! pentium */ |
---|
628 | #if HAVE_HOST_CPU_pentiummmx |
---|
629 | |
---|
630 | /* The following should be a fixed 14 or 15 cycles, but possibly plus an L1 |
---|
631 | cache miss reading from __clz_tab. It's favoured over the float above so |
---|
632 | as to avoid mixing MMX and x87, since the penalty for switching between |
---|
633 | the two is about 100 cycles. |
---|
634 | |
---|
635 | The asm block sets __shift to -3 if the high 24 bits are clear, -2 for |
---|
636 | 16, -1 for 8, or 0 otherwise. This could be written equivalently as |
---|
637 | follows, but as of gcc 2.95.2 it results in conditional jumps. |
---|
638 | |
---|
639 | __shift = -(__n < 0x1000000); |
---|
640 | __shift -= (__n < 0x10000); |
---|
641 | __shift -= (__n < 0x100); |
---|
642 | |
---|
643 | The middle two sbbl and cmpl's pair, and with luck something gcc |
---|
644 | generates might pair with the first cmpl and the last sbbl. The "32+1" |
---|
645 | constant could be folded into __clz_tab[], but it doesn't seem worth |
---|
646 | making a different table just for that. */ |
---|
647 | |
---|
648 | #define count_leading_zeros(c,n) \ |
---|
649 | do { \ |
---|
650 | USItype __n = (n); \ |
---|
651 | USItype __shift; \ |
---|
652 | __asm__ ("cmpl $0x1000000, %1\n" \ |
---|
653 | "sbbl %0, %0\n" \ |
---|
654 | "cmpl $0x10000, %1\n" \ |
---|
655 | "sbbl $0, %0\n" \ |
---|
656 | "cmpl $0x100, %1\n" \ |
---|
657 | "sbbl $0, %0\n" \ |
---|
658 | : "=&r" (__shift) : "r" (__n)); \ |
---|
659 | __shift = __shift*8 + 24 + 1; \ |
---|
660 | (c) = 32 + 1 - __shift - __clz_tab[__n >> __shift]; \ |
---|
661 | } while (0) |
---|
662 | |
---|
663 | #define COUNT_LEADING_ZEROS_NEED_CLZ_TAB |
---|
664 | #define COUNT_LEADING_ZEROS_0 31 /* n==0 indistinguishable from n==1 */ |
---|
665 | |
---|
666 | #else /* !pentiummmx */ |
---|
667 | /* On P6, gcc prior to 3.0 generates a partial register stall for |
---|
668 | __cbtmp^31, due to using "xorb $31" instead of "xorl $31", the former |
---|
669 | being 1 code byte smaller. "31-__cbtmp" is a workaround, probably at the |
---|
670 | cost of one extra instruction. Do this for "i386" too, since that means |
---|
671 | generic x86. */ |
---|
672 | #if __GNUC__ < 3 \ |
---|
673 | && (HAVE_HOST_CPU_i386 \ |
---|
674 | || HAVE_HOST_CPU_i686 \ |
---|
675 | || HAVE_HOST_CPU_pentiumpro \ |
---|
676 | || HAVE_HOST_CPU_pentium2 \ |
---|
677 | || HAVE_HOST_CPU_pentium3) |
---|
678 | #define count_leading_zeros(count, x) \ |
---|
679 | do { \ |
---|
680 | USItype __cbtmp; \ |
---|
681 | ASSERT ((x) != 0); \ |
---|
682 | __asm__ ("bsrl %1,%0" : "=r" (__cbtmp) : "rm" ((USItype)(x))); \ |
---|
683 | (count) = 31 - __cbtmp; \ |
---|
684 | } while (0) |
---|
685 | #else |
---|
686 | #define count_leading_zeros(count, x) \ |
---|
687 | do { \ |
---|
688 | USItype __cbtmp; \ |
---|
689 | ASSERT ((x) != 0); \ |
---|
690 | __asm__ ("bsrl %1,%0" : "=r" (__cbtmp) : "rm" ((USItype)(x))); \ |
---|
691 | (count) = __cbtmp ^ 31; \ |
---|
692 | } while (0) |
---|
693 | #endif |
---|
694 | |
---|
695 | #define count_trailing_zeros(count, x) \ |
---|
696 | do { \ |
---|
697 | ASSERT ((x) != 0); \ |
---|
698 | __asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x))); \ |
---|
699 | } while (0) |
---|
700 | #endif /* ! pentiummmx */ |
---|
701 | #endif /* ! pentium */ |
---|
702 | |
---|
703 | #ifndef UMUL_TIME |
---|
704 | #define UMUL_TIME 10 |
---|
705 | #endif |
---|
706 | #ifndef UDIV_TIME |
---|
707 | #define UDIV_TIME 40 |
---|
708 | #endif |
---|
709 | #endif /* 80x86 */ |
---|
710 | |
---|
711 | #if defined (__x86_64__) && W_TYPE_SIZE == 64 |
---|
712 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |
---|
713 | __asm__ ("addq %5,%1\n\tadcq %3,%0" \ |
---|
714 | : "=r" ((UDItype)(sh)), "=&r" ((UDItype)(sl)) \ |
---|
715 | : "0" ((UDItype)(ah)), "g" ((UDItype)(bh)), \ |
---|
716 | "%1" ((UDItype)(al)), "g" ((UDItype)(bl))) |
---|
717 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |
---|
718 | __asm__ ("subq %5,%1\n\tsbbq %3,%0" \ |
---|
719 | : "=r" ((UDItype)(sh)), "=&r" ((UDItype)(sl)) \ |
---|
720 | : "0" ((UDItype)(ah)), "g" ((UDItype)(bh)), \ |
---|
721 | "1" ((UDItype)(al)), "g" ((UDItype)(bl))) |
---|
722 | #define umul_ppmm(w1, w0, u, v) \ |
---|
723 | __asm__ ("mulq %3" \ |
---|
724 | : "=a" (w0), "=d" (w1) \ |
---|
725 | : "%0" ((UDItype)(u)), "rm" ((UDItype)(v))) |
---|
726 | #define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding "=d" */\ |
---|
727 | __asm__ ("divq %4" /* stringification in K&R C */ \ |
---|
728 | : "=a" (q), "=d" (r) \ |
---|
729 | : "0" ((UDItype)(n0)), "1" ((UDItype)(n1)), "rm" ((UDItype)(dx))) |
---|
730 | #define count_leading_zeros(count, x) \ |
---|
731 | do { \ |
---|
732 | UDItype __cbtmp; \ |
---|
733 | ASSERT ((x) != 0); \ |
---|
734 | __asm__ ("bsrq %1,%0" : "=r" (__cbtmp) : "rm" ((UDItype)(x))); \ |
---|
735 | (count) = __cbtmp ^ 63; \ |
---|
736 | } while (0) |
---|
737 | /* bsfq destination must be a 64-bit register, "%q0" forces this in case |
---|
738 | count is only an int. */ |
---|
739 | #define count_trailing_zeros(count, x) \ |
---|
740 | do { \ |
---|
741 | ASSERT ((x) != 0); \ |
---|
742 | __asm__ ("bsfq %1,%q0" : "=r" (count) : "rm" ((UDItype)(x))); \ |
---|
743 | } while (0) |
---|
744 | #endif /* x86_64 */ |
---|
745 | |
---|
746 | #if defined (__i860__) && W_TYPE_SIZE == 32 |
---|
747 | #define rshift_rhlc(r,h,l,c) \ |
---|
748 | __asm__ ("shr %3,r0,r0\;shrd %1,%2,%0" \ |
---|
749 | "=r" (r) : "r" (h), "r" (l), "rn" (c)) |
---|
750 | #endif /* i860 */ |
---|
751 | |
---|
752 | #if defined (__i960__) && W_TYPE_SIZE == 32 |
---|
753 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |
---|
754 | __asm__ ("cmpo 1,0\;addc %5,%4,%1\;addc %3,%2,%0" \ |
---|
755 | : "=r" (sh), "=&r" (sl) \ |
---|
756 | : "dI" (ah), "dI" (bh), "%dI" (al), "dI" (bl)) |
---|
757 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |
---|
758 | __asm__ ("cmpo 0,0\;subc %5,%4,%1\;subc %3,%2,%0" \ |
---|
759 | : "=r" (sh), "=&r" (sl) \ |
---|
760 | : "dI" (ah), "dI" (bh), "dI" (al), "dI" (bl)) |
---|
761 | #define umul_ppmm(w1, w0, u, v) \ |
---|
762 | ({union {UDItype __ll; \ |
---|
763 | struct {USItype __l, __h;} __i; \ |
---|
764 | } __x; \ |
---|
765 | __asm__ ("emul %2,%1,%0" \ |
---|
766 | : "=d" (__x.__ll) : "%dI" (u), "dI" (v)); \ |
---|
767 | (w1) = __x.__i.__h; (w0) = __x.__i.__l;}) |
---|
768 | #define __umulsidi3(u, v) \ |
---|
769 | ({UDItype __w; \ |
---|
770 | __asm__ ("emul %2,%1,%0" : "=d" (__w) : "%dI" (u), "dI" (v)); \ |
---|
771 | __w; }) |
---|
772 | #define udiv_qrnnd(q, r, nh, nl, d) \ |
---|
773 | do { \ |
---|
774 | union {UDItype __ll; \ |
---|
775 | struct {USItype __l, __h;} __i; \ |
---|
776 | } __nn; \ |
---|
777 | __nn.__i.__h = (nh); __nn.__i.__l = (nl); \ |
---|
778 | __asm__ ("ediv %d,%n,%0" \ |
---|
779 | : "=d" (__rq.__ll) : "dI" (__nn.__ll), "dI" (d)); \ |
---|
780 | (r) = __rq.__i.__l; (q) = __rq.__i.__h; \ |
---|
781 | } while (0) |
---|
782 | #define count_leading_zeros(count, x) \ |
---|
783 | do { \ |
---|
784 | USItype __cbtmp; \ |
---|
785 | __asm__ ("scanbit %1,%0" : "=r" (__cbtmp) : "r" (x)); \ |
---|
786 | (count) = __cbtmp ^ 31; \ |
---|
787 | } while (0) |
---|
788 | #define COUNT_LEADING_ZEROS_0 (-32) /* sic */ |
---|
789 | #if defined (__i960mx) /* what is the proper symbol to test??? */ |
---|
790 | #define rshift_rhlc(r,h,l,c) \ |
---|
791 | do { \ |
---|
792 | union {UDItype __ll; \ |
---|
793 | struct {USItype __l, __h;} __i; \ |
---|
794 | } __nn; \ |
---|
795 | __nn.__i.__h = (h); __nn.__i.__l = (l); \ |
---|
796 | __asm__ ("shre %2,%1,%0" : "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \ |
---|
797 | } |
---|
798 | #endif /* i960mx */ |
---|
799 | #endif /* i960 */ |
---|
800 | |
---|
801 | #if (defined (__mc68000__) || defined (__mc68020__) || defined(mc68020) \ |
---|
802 | || defined (__m68k__) || defined (__mc5200__) || defined (__mc5206e__) \ |
---|
803 | || defined (__mc5307__)) && W_TYPE_SIZE == 32 |
---|
804 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |
---|
805 | __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0" \ |
---|
806 | : "=d" ((USItype)(sh)), "=&d" ((USItype)(sl)) \ |
---|
807 | : "0" ((USItype)(ah)), "d" ((USItype)(bh)), \ |
---|
808 | "%1" ((USItype)(al)), "g" ((USItype)(bl))) |
---|
809 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |
---|
810 | __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0" \ |
---|
811 | : "=d" ((USItype)(sh)), "=&d" ((USItype)(sl)) \ |
---|
812 | : "0" ((USItype)(ah)), "d" ((USItype)(bh)), \ |
---|
813 | "1" ((USItype)(al)), "g" ((USItype)(bl))) |
---|
814 | /* The '020, '030, '040 and CPU32 have 32x32->64 and 64/32->32q-32r. */ |
---|
815 | #if defined (__mc68020__) || defined(mc68020) \ |
---|
816 | || defined (__mc68030__) || defined (mc68030) \ |
---|
817 | || defined (__mc68040__) || defined (mc68040) \ |
---|
818 | || defined (__mcpu32__) || defined (mcpu32) \ |
---|
819 | || defined (__NeXT__) |
---|
820 | #define umul_ppmm(w1, w0, u, v) \ |
---|
821 | __asm__ ("mulu%.l %3,%1:%0" \ |
---|
822 | : "=d" ((USItype)(w0)), "=d" ((USItype)(w1)) \ |
---|
823 | : "%0" ((USItype)(u)), "dmi" ((USItype)(v))) |
---|
824 | #define UMUL_TIME 45 |
---|
825 | #define udiv_qrnnd(q, r, n1, n0, d) \ |
---|
826 | __asm__ ("divu%.l %4,%1:%0" \ |
---|
827 | : "=d" ((USItype)(q)), "=d" ((USItype)(r)) \ |
---|
828 | : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d))) |
---|
829 | #define UDIV_TIME 90 |
---|
830 | #define sdiv_qrnnd(q, r, n1, n0, d) \ |
---|
831 | __asm__ ("divs%.l %4,%1:%0" \ |
---|
832 | : "=d" ((USItype)(q)), "=d" ((USItype)(r)) \ |
---|
833 | : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d))) |
---|
834 | #else /* for other 68k family members use 16x16->32 multiplication */ |
---|
835 | #define umul_ppmm(xh, xl, a, b) \ |
---|
836 | do { USItype __umul_tmp1, __umul_tmp2; \ |
---|
837 | __asm__ ("| Inlined umul_ppmm\n" \ |
---|
838 | " move%.l %5,%3\n" \ |
---|
839 | " move%.l %2,%0\n" \ |
---|
840 | " move%.w %3,%1\n" \ |
---|
841 | " swap %3\n" \ |
---|
842 | " swap %0\n" \ |
---|
843 | " mulu%.w %2,%1\n" \ |
---|
844 | " mulu%.w %3,%0\n" \ |
---|
845 | " mulu%.w %2,%3\n" \ |
---|
846 | " swap %2\n" \ |
---|
847 | " mulu%.w %5,%2\n" \ |
---|
848 | " add%.l %3,%2\n" \ |
---|
849 | " jcc 1f\n" \ |
---|
850 | " add%.l %#0x10000,%0\n" \ |
---|
851 | "1: move%.l %2,%3\n" \ |
---|
852 | " clr%.w %2\n" \ |
---|
853 | " swap %2\n" \ |
---|
854 | " swap %3\n" \ |
---|
855 | " clr%.w %3\n" \ |
---|
856 | " add%.l %3,%1\n" \ |
---|
857 | " addx%.l %2,%0\n" \ |
---|
858 | " | End inlined umul_ppmm" \ |
---|
859 | : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \ |
---|
860 | "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \ |
---|
861 | : "%2" ((USItype)(a)), "d" ((USItype)(b))); \ |
---|
862 | } while (0) |
---|
863 | #define UMUL_TIME 100 |
---|
864 | #define UDIV_TIME 400 |
---|
865 | #endif /* not mc68020 */ |
---|
866 | /* The '020, '030, '040 and '060 have bitfield insns. |
---|
867 | GCC 3.4 defines __mc68020__ when in CPU32 mode, check for __mcpu32__ to |
---|
868 | exclude bfffo on that chip (bitfield insns not available). */ |
---|
869 | #if (defined (__mc68020__) || defined (mc68020) \ |
---|
870 | || defined (__mc68030__) || defined (mc68030) \ |
---|
871 | || defined (__mc68040__) || defined (mc68040) \ |
---|
872 | || defined (__mc68060__) || defined (mc68060) \ |
---|
873 | || defined (__NeXT__)) \ |
---|
874 | && ! defined (__mcpu32__) |
---|
875 | #define count_leading_zeros(count, x) \ |
---|
876 | __asm__ ("bfffo %1{%b2:%b2},%0" \ |
---|
877 | : "=d" ((USItype) (count)) \ |
---|
878 | : "od" ((USItype) (x)), "n" (0)) |
---|
879 | #define COUNT_LEADING_ZEROS_0 32 |
---|
880 | #endif |
---|
881 | #endif /* mc68000 */ |
---|
882 | |
---|
883 | #if defined (__m88000__) && W_TYPE_SIZE == 32 |
---|
884 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |
---|
885 | __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3" \ |
---|
886 | : "=r" (sh), "=&r" (sl) \ |
---|
887 | : "rJ" (ah), "rJ" (bh), "%rJ" (al), "rJ" (bl)) |
---|
888 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |
---|
889 | __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3" \ |
---|
890 | : "=r" (sh), "=&r" (sl) \ |
---|
891 | : "rJ" (ah), "rJ" (bh), "rJ" (al), "rJ" (bl)) |
---|
892 | #define count_leading_zeros(count, x) \ |
---|
893 | do { \ |
---|
894 | USItype __cbtmp; \ |
---|
895 | __asm__ ("ff1 %0,%1" : "=r" (__cbtmp) : "r" (x)); \ |
---|
896 | (count) = __cbtmp ^ 31; \ |
---|
897 | } while (0) |
---|
898 | #define COUNT_LEADING_ZEROS_0 63 /* sic */ |
---|
899 | #if defined (__m88110__) |
---|
900 | #define umul_ppmm(wh, wl, u, v) \ |
---|
901 | do { \ |
---|
902 | union {UDItype __ll; \ |
---|
903 | struct {USItype __h, __l;} __i; \ |
---|
904 | } __x; \ |
---|
905 | __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \ |
---|
906 | (wh) = __x.__i.__h; \ |
---|
907 | (wl) = __x.__i.__l; \ |
---|
908 | } while (0) |
---|
909 | #define udiv_qrnnd(q, r, n1, n0, d) \ |
---|
910 | ({union {UDItype __ll; \ |
---|
911 | struct {USItype __h, __l;} __i; \ |
---|
912 | } __x, __q; \ |
---|
913 | __x.__i.__h = (n1); __x.__i.__l = (n0); \ |
---|
914 | __asm__ ("divu.d %0,%1,%2" \ |
---|
915 | : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \ |
---|
916 | (r) = (n0) - __q.__l * (d); (q) = __q.__l; }) |
---|
917 | #define UMUL_TIME 5 |
---|
918 | #define UDIV_TIME 25 |
---|
919 | #else |
---|
920 | #define UMUL_TIME 17 |
---|
921 | #define UDIV_TIME 150 |
---|
922 | #endif /* __m88110__ */ |
---|
923 | #endif /* __m88000__ */ |
---|
924 | |
---|
925 | #if defined (__mips) && W_TYPE_SIZE == 32 |
---|
926 | #if __GNUC__ > 2 || __GNUC_MINOR__ >= 7 |
---|
927 | #define umul_ppmm(w1, w0, u, v) \ |
---|
928 | __asm__ ("multu %2,%3" : "=l" (w0), "=h" (w1) : "d" (u), "d" (v)) |
---|
929 | #else |
---|
930 | #define umul_ppmm(w1, w0, u, v) \ |
---|
931 | __asm__ ("multu %2,%3\n\tmflo %0\n\tmfhi %1" \ |
---|
932 | : "=d" (w0), "=d" (w1) : "d" (u), "d" (v)) |
---|
933 | #endif |
---|
934 | #define UMUL_TIME 10 |
---|
935 | #define UDIV_TIME 100 |
---|
936 | #endif /* __mips */ |
---|
937 | |
---|
938 | #if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64 |
---|
939 | #if __GNUC__ > 2 || __GNUC_MINOR__ >= 7 |
---|
940 | #define umul_ppmm(w1, w0, u, v) \ |
---|
941 | __asm__ ("dmultu %2,%3" : "=l" (w0), "=h" (w1) : "d" (u), "d" (v)) |
---|
942 | #else |
---|
943 | #define umul_ppmm(w1, w0, u, v) \ |
---|
944 | __asm__ ("dmultu %2,%3\n\tmflo %0\n\tmfhi %1" \ |
---|
945 | : "=d" (w0), "=d" (w1) : "d" (u), "d" (v)) |
---|
946 | #endif |
---|
947 | #define UMUL_TIME 20 |
---|
948 | #define UDIV_TIME 140 |
---|
949 | #endif /* __mips */ |
---|
950 | |
---|
951 | #if defined (__ns32000__) && W_TYPE_SIZE == 32 |
---|
952 | #define umul_ppmm(w1, w0, u, v) \ |
---|
953 | ({union {UDItype __ll; \ |
---|
954 | struct {USItype __l, __h;} __i; \ |
---|
955 | } __x; \ |
---|
956 | __asm__ ("meid %2,%0" \ |
---|
957 | : "=g" (__x.__ll) \ |
---|
958 | : "%0" ((USItype)(u)), "g" ((USItype)(v))); \ |
---|
959 | (w1) = __x.__i.__h; (w0) = __x.__i.__l;}) |
---|
960 | #define __umulsidi3(u, v) \ |
---|
961 | ({UDItype __w; \ |
---|
962 | __asm__ ("meid %2,%0" \ |
---|
963 | : "=g" (__w) \ |
---|
964 | : "%0" ((USItype)(u)), "g" ((USItype)(v))); \ |
---|
965 | __w; }) |
---|
966 | #define udiv_qrnnd(q, r, n1, n0, d) \ |
---|
967 | ({union {UDItype __ll; \ |
---|
968 | struct {USItype __l, __h;} __i; \ |
---|
969 | } __x; \ |
---|
970 | __x.__i.__h = (n1); __x.__i.__l = (n0); \ |
---|
971 | __asm__ ("deid %2,%0" \ |
---|
972 | : "=g" (__x.__ll) \ |
---|
973 | : "0" (__x.__ll), "g" ((USItype)(d))); \ |
---|
974 | (r) = __x.__i.__l; (q) = __x.__i.__h; }) |
---|
975 | #define count_trailing_zeros(count,x) \ |
---|
976 | do { \ |
---|
977 | __asm__ ("ffsd %2,%0" \ |
---|
978 | : "=r" ((USItype) (count)) \ |
---|
979 | : "0" ((USItype) 0), "r" ((USItype) (x))); \ |
---|
980 | } while (0) |
---|
981 | #endif /* __ns32000__ */ |
---|
982 | |
---|
983 | /* FIXME: We should test _IBMR2 here when we add assembly support for the |
---|
984 | system vendor compilers. */ |
---|
985 | #if (defined (_ARCH_PPC) /* AIX */ \ |
---|
986 | || defined (_ARCH_PWR) /* AIX */ \ |
---|
987 | || defined (__powerpc__) /* gcc */ \ |
---|
988 | || defined (__POWERPC__) /* BEOS */ \ |
---|
989 | || defined (__ppc__) /* Darwin */ \ |
---|
990 | || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \ |
---|
991 | || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \ |
---|
992 | && CPU_FAMILY == PPC) \ |
---|
993 | ) && W_TYPE_SIZE == 32 |
---|
994 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |
---|
995 | do { \ |
---|
996 | if (__builtin_constant_p (bh) && (bh) == 0) \ |
---|
997 | __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ |
---|
998 | : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ |
---|
999 | else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ |
---|
1000 | __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ |
---|
1001 | : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ |
---|
1002 | else \ |
---|
1003 | __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ |
---|
1004 | : "=r" (sh), "=&r" (sl) \ |
---|
1005 | : "r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \ |
---|
1006 | } while (0) |
---|
1007 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |
---|
1008 | do { \ |
---|
1009 | if (__builtin_constant_p (ah) && (ah) == 0) \ |
---|
1010 | __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ |
---|
1011 | : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ |
---|
1012 | else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \ |
---|
1013 | __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ |
---|
1014 | : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ |
---|
1015 | else if (__builtin_constant_p (bh) && (bh) == 0) \ |
---|
1016 | __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ |
---|
1017 | : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ |
---|
1018 | else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ |
---|
1019 | __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ |
---|
1020 | : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ |
---|
1021 | else \ |
---|
1022 | __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ |
---|
1023 | : "=r" (sh), "=&r" (sl) \ |
---|
1024 | : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \ |
---|
1025 | } while (0) |
---|
1026 | #define count_leading_zeros(count, x) \ |
---|
1027 | __asm__ ("{cntlz|cntlzw} %0,%1" : "=r" (count) : "r" (x)) |
---|
1028 | #define COUNT_LEADING_ZEROS_0 32 |
---|
1029 | #if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \ |
---|
1030 | || defined (__ppc__) \ |
---|
1031 | || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \ |
---|
1032 | || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \ |
---|
1033 | && CPU_FAMILY == PPC) |
---|
1034 | #define umul_ppmm(ph, pl, m0, m1) \ |
---|
1035 | do { \ |
---|
1036 | USItype __m0 = (m0), __m1 = (m1); \ |
---|
1037 | __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ |
---|
1038 | (pl) = __m0 * __m1; \ |
---|
1039 | } while (0) |
---|
1040 | #define UMUL_TIME 15 |
---|
1041 | #define smul_ppmm(ph, pl, m0, m1) \ |
---|
1042 | do { \ |
---|
1043 | SItype __m0 = (m0), __m1 = (m1); \ |
---|
1044 | __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ |
---|
1045 | (pl) = __m0 * __m1; \ |
---|
1046 | } while (0) |
---|
1047 | #define SMUL_TIME 14 |
---|
1048 | #define UDIV_TIME 120 |
---|
1049 | #else |
---|
1050 | #define UMUL_TIME 8 |
---|
1051 | #define smul_ppmm(xh, xl, m0, m1) \ |
---|
1052 | __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1)) |
---|
1053 | #define SMUL_TIME 4 |
---|
1054 | #define sdiv_qrnnd(q, r, nh, nl, d) \ |
---|
1055 | __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d)) |
---|
1056 | #define UDIV_TIME 100 |
---|
1057 | #endif |
---|
1058 | #endif /* 32-bit POWER architecture variants. */ |
---|
1059 | |
---|
1060 | /* We should test _IBMR2 here when we add assembly support for the system |
---|
1061 | vendor compilers. */ |
---|
1062 | #if (defined (_ARCH_PPC) || defined (__powerpc__)) && W_TYPE_SIZE == 64 |
---|
1063 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |
---|
1064 | do { \ |
---|
1065 | if (__builtin_constant_p (bh) && (bh) == 0) \ |
---|
1066 | __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ |
---|
1067 | : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ |
---|
1068 | else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \ |
---|
1069 | __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ |
---|
1070 | : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ |
---|
1071 | else \ |
---|
1072 | __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ |
---|
1073 | : "=r" (sh), "=&r" (sl) \ |
---|
1074 | : "r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \ |
---|
1075 | } while (0) |
---|
1076 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |
---|
1077 | do { \ |
---|
1078 | if (__builtin_constant_p (ah) && (ah) == 0) \ |
---|
1079 | __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ |
---|
1080 | : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ |
---|
1081 | else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \ |
---|
1082 | __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ |
---|
1083 | : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ |
---|
1084 | else if (__builtin_constant_p (bh) && (bh) == 0) \ |
---|
1085 | __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ |
---|
1086 | : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ |
---|
1087 | else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \ |
---|
1088 | __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ |
---|
1089 | : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ |
---|
1090 | else \ |
---|
1091 | __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ |
---|
1092 | : "=r" (sh), "=&r" (sl) \ |
---|
1093 | : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \ |
---|
1094 | } while (0) |
---|
1095 | #define count_leading_zeros(count, x) \ |
---|
1096 | __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x)) |
---|
1097 | #define COUNT_LEADING_ZEROS_0 64 |
---|
1098 | #define umul_ppmm(ph, pl, m0, m1) \ |
---|
1099 | do { \ |
---|
1100 | UDItype __m0 = (m0), __m1 = (m1); \ |
---|
1101 | __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ |
---|
1102 | (pl) = __m0 * __m1; \ |
---|
1103 | } while (0) |
---|
1104 | #define UMUL_TIME 15 |
---|
1105 | #define smul_ppmm(ph, pl, m0, m1) \ |
---|
1106 | do { \ |
---|
1107 | DItype __m0 = (m0), __m1 = (m1); \ |
---|
1108 | __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ |
---|
1109 | (pl) = __m0 * __m1; \ |
---|
1110 | } while (0) |
---|
1111 | #define SMUL_TIME 14 /* ??? */ |
---|
1112 | #define UDIV_TIME 120 /* ??? */ |
---|
1113 | #endif /* 64-bit PowerPC. */ |
---|
1114 | |
---|
1115 | #if defined (__pyr__) && W_TYPE_SIZE == 32 |
---|
1116 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |
---|
1117 | __asm__ ("addw %5,%1\n\taddwc %3,%0" \ |
---|
1118 | : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \ |
---|
1119 | : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ |
---|
1120 | "%1" ((USItype)(al)), "g" ((USItype)(bl))) |
---|
1121 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |
---|
1122 | __asm__ ("subw %5,%1\n\tsubwb %3,%0" \ |
---|
1123 | : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \ |
---|
1124 | : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ |
---|
1125 | "1" ((USItype)(al)), "g" ((USItype)(bl))) |
---|
1126 | /* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */ |
---|
1127 | #define umul_ppmm(w1, w0, u, v) \ |
---|
1128 | ({union {UDItype __ll; \ |
---|
1129 | struct {USItype __h, __l;} __i; \ |
---|
1130 | } __x; \ |
---|
1131 | __asm__ ("movw %1,%R0\n\tuemul %2,%0" \ |
---|
1132 | : "=&r" (__x.__ll) \ |
---|
1133 | : "g" ((USItype) (u)), "g" ((USItype)(v))); \ |
---|
1134 | (w1) = __x.__i.__h; (w0) = __x.__i.__l;}) |
---|
1135 | #endif /* __pyr__ */ |
---|
1136 | |
---|
1137 | #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32 |
---|
1138 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |
---|
1139 | __asm__ ("a %1,%5\n\tae %0,%3" \ |
---|
1140 | : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \ |
---|
1141 | : "0" ((USItype)(ah)), "r" ((USItype)(bh)), \ |
---|
1142 | "%1" ((USItype)(al)), "r" ((USItype)(bl))) |
---|
1143 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |
---|
1144 | __asm__ ("s %1,%5\n\tse %0,%3" \ |
---|
1145 | : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \ |
---|
1146 | : "0" ((USItype)(ah)), "r" ((USItype)(bh)), \ |
---|
1147 | "1" ((USItype)(al)), "r" ((USItype)(bl))) |
---|
1148 | #define smul_ppmm(ph, pl, m0, m1) \ |
---|
1149 | __asm__ ( \ |
---|
1150 | "s r2,r2\n" \ |
---|
1151 | " mts r10,%2\n" \ |
---|
1152 | " m r2,%3\n" \ |
---|
1153 | " m r2,%3\n" \ |
---|
1154 | " m r2,%3\n" \ |
---|
1155 | " m r2,%3\n" \ |
---|
1156 | " m r2,%3\n" \ |
---|
1157 | " m r2,%3\n" \ |
---|
1158 | " m r2,%3\n" \ |
---|
1159 | " m r2,%3\n" \ |
---|
1160 | " m r2,%3\n" \ |
---|
1161 | " m r2,%3\n" \ |
---|
1162 | " m r2,%3\n" \ |
---|
1163 | " m r2,%3\n" \ |
---|
1164 | " m r2,%3\n" \ |
---|
1165 | " m r2,%3\n" \ |
---|
1166 | " m r2,%3\n" \ |
---|
1167 | " m r2,%3\n" \ |
---|
1168 | " cas %0,r2,r0\n" \ |
---|
1169 | " mfs r10,%1" \ |
---|
1170 | : "=r" ((USItype)(ph)), "=r" ((USItype)(pl)) \ |
---|
1171 | : "%r" ((USItype)(m0)), "r" ((USItype)(m1)) \ |
---|
1172 | : "r2") |
---|
1173 | #define UMUL_TIME 20 |
---|
1174 | #define UDIV_TIME 200 |
---|
1175 | #define count_leading_zeros(count, x) \ |
---|
1176 | do { \ |
---|
1177 | if ((x) >= 0x10000) \ |
---|
1178 | __asm__ ("clz %0,%1" \ |
---|
1179 | : "=r" ((USItype)(count)) : "r" ((USItype)(x) >> 16)); \ |
---|
1180 | else \ |
---|
1181 | { \ |
---|
1182 | __asm__ ("clz %0,%1" \ |
---|
1183 | : "=r" ((USItype)(count)) : "r" ((USItype)(x))); \ |
---|
1184 | (count) += 16; \ |
---|
1185 | } \ |
---|
1186 | } while (0) |
---|
1187 | #endif /* RT/ROMP */ |
---|
1188 | |
---|
1189 | #if defined (__sh2__) && W_TYPE_SIZE == 32 |
---|
1190 | #define umul_ppmm(w1, w0, u, v) \ |
---|
1191 | __asm__ ("dmulu.l %2,%3\n\tsts macl,%1\n\tsts mach,%0" \ |
---|
1192 | : "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "macl", "mach") |
---|
1193 | #define UMUL_TIME 5 |
---|
1194 | #endif |
---|
1195 | |
---|
1196 | #if defined (__sparc__) && W_TYPE_SIZE == 32 |
---|
1197 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |
---|
1198 | __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0" \ |
---|
1199 | : "=r" (sh), "=&r" (sl) \ |
---|
1200 | : "rJ" (ah), "rI" (bh),"%rJ" (al), "rI" (bl) \ |
---|
1201 | __CLOBBER_CC) |
---|
1202 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |
---|
1203 | __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0" \ |
---|
1204 | : "=r" (sh), "=&r" (sl) \ |
---|
1205 | : "rJ" (ah), "rI" (bh), "rJ" (al), "rI" (bl) \ |
---|
1206 | __CLOBBER_CC) |
---|
1207 | /* FIXME: When gcc -mcpu=v9 is used on solaris, gcc/config/sol2-sld-64.h |
---|
1208 | doesn't define anything to indicate that to us, it only sets __sparcv8. */ |
---|
1209 | #if defined (__sparc_v9__) || defined (__sparcv9) |
---|
1210 | /* Perhaps we should use floating-point operations here? */ |
---|
1211 | #if 0 |
---|
1212 | /* Triggers a bug making mpz/tests/t-gcd.c fail. |
---|
1213 | Perhaps we simply need explicitly zero-extend the inputs? */ |
---|
1214 | #define umul_ppmm(w1, w0, u, v) \ |
---|
1215 | __asm__ ("mulx %2,%3,%%g1; srl %%g1,0,%1; srlx %%g1,32,%0" : \ |
---|
1216 | "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "g1") |
---|
1217 | #else |
---|
1218 | /* Use v8 umul until above bug is fixed. */ |
---|
1219 | #define umul_ppmm(w1, w0, u, v) \ |
---|
1220 | __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v)) |
---|
1221 | #endif |
---|
1222 | /* Use a plain v8 divide for v9. */ |
---|
1223 | #define udiv_qrnnd(q, r, n1, n0, d) \ |
---|
1224 | do { \ |
---|
1225 | USItype __q; \ |
---|
1226 | __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \ |
---|
1227 | : "=r" (__q) : "r" (n1), "r" (n0), "r" (d)); \ |
---|
1228 | (r) = (n0) - __q * (d); \ |
---|
1229 | (q) = __q; \ |
---|
1230 | } while (0) |
---|
1231 | #else |
---|
1232 | #if defined (__sparc_v8__) /* gcc normal */ \ |
---|
1233 | || defined (__sparcv8) /* gcc solaris */ |
---|
1234 | /* Don't match immediate range because, 1) it is not often useful, |
---|
1235 | 2) the 'I' flag thinks of the range as a 13 bit signed interval, |
---|
1236 | while we want to match a 13 bit interval, sign extended to 32 bits, |
---|
1237 | but INTERPRETED AS UNSIGNED. */ |
---|
1238 | #define umul_ppmm(w1, w0, u, v) \ |
---|
1239 | __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v)) |
---|
1240 | #define UMUL_TIME 5 |
---|
1241 | |
---|
1242 | #if HAVE_HOST_CPU_supersparc |
---|
1243 | #define UDIV_TIME 60 /* SuperSPARC timing */ |
---|
1244 | #else |
---|
1245 | /* Don't use this on SuperSPARC because its udiv only handles 53 bit |
---|
1246 | dividends and will trap to the kernel for the rest. */ |
---|
1247 | #define udiv_qrnnd(q, r, n1, n0, d) \ |
---|
1248 | do { \ |
---|
1249 | USItype __q; \ |
---|
1250 | __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \ |
---|
1251 | : "=r" (__q) : "r" (n1), "r" (n0), "r" (d)); \ |
---|
1252 | (r) = (n0) - __q * (d); \ |
---|
1253 | (q) = __q; \ |
---|
1254 | } while (0) |
---|
1255 | #define UDIV_TIME 25 |
---|
1256 | #endif /* HAVE_HOST_CPU_supersparc */ |
---|
1257 | |
---|
1258 | #else /* ! __sparc_v8__ */ |
---|
1259 | #if defined (__sparclite__) |
---|
1260 | /* This has hardware multiply but not divide. It also has two additional |
---|
1261 | instructions scan (ffs from high bit) and divscc. */ |
---|
1262 | #define umul_ppmm(w1, w0, u, v) \ |
---|
1263 | __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v)) |
---|
1264 | #define UMUL_TIME 5 |
---|
1265 | #define udiv_qrnnd(q, r, n1, n0, d) \ |
---|
1266 | __asm__ ("! Inlined udiv_qrnnd\n" \ |
---|
1267 | " wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \ |
---|
1268 | " tst %%g0\n" \ |
---|
1269 | " divscc %3,%4,%%g1\n" \ |
---|
1270 | " divscc %%g1,%4,%%g1\n" \ |
---|
1271 | " divscc %%g1,%4,%%g1\n" \ |
---|
1272 | " divscc %%g1,%4,%%g1\n" \ |
---|
1273 | " divscc %%g1,%4,%%g1\n" \ |
---|
1274 | " divscc %%g1,%4,%%g1\n" \ |
---|
1275 | " divscc %%g1,%4,%%g1\n" \ |
---|
1276 | " divscc %%g1,%4,%%g1\n" \ |
---|
1277 | " divscc %%g1,%4,%%g1\n" \ |
---|
1278 | " divscc %%g1,%4,%%g1\n" \ |
---|
1279 | " divscc %%g1,%4,%%g1\n" \ |
---|
1280 | " divscc %%g1,%4,%%g1\n" \ |
---|
1281 | " divscc %%g1,%4,%%g1\n" \ |
---|
1282 | " divscc %%g1,%4,%%g1\n" \ |
---|
1283 | " divscc %%g1,%4,%%g1\n" \ |
---|
1284 | " divscc %%g1,%4,%%g1\n" \ |
---|
1285 | " divscc %%g1,%4,%%g1\n" \ |
---|
1286 | " divscc %%g1,%4,%%g1\n" \ |
---|
1287 | " divscc %%g1,%4,%%g1\n" \ |
---|
1288 | " divscc %%g1,%4,%%g1\n" \ |
---|
1289 | " divscc %%g1,%4,%%g1\n" \ |
---|
1290 | " divscc %%g1,%4,%%g1\n" \ |
---|
1291 | " divscc %%g1,%4,%%g1\n" \ |
---|
1292 | " divscc %%g1,%4,%%g1\n" \ |
---|
1293 | " divscc %%g1,%4,%%g1\n" \ |
---|
1294 | " divscc %%g1,%4,%%g1\n" \ |
---|
1295 | " divscc %%g1,%4,%%g1\n" \ |
---|
1296 | " divscc %%g1,%4,%%g1\n" \ |
---|
1297 | " divscc %%g1,%4,%%g1\n" \ |
---|
1298 | " divscc %%g1,%4,%%g1\n" \ |
---|
1299 | " divscc %%g1,%4,%%g1\n" \ |
---|
1300 | " divscc %%g1,%4,%0\n" \ |
---|
1301 | " rd %%y,%1\n" \ |
---|
1302 | " bl,a 1f\n" \ |
---|
1303 | " add %1,%4,%1\n" \ |
---|
1304 | "1: ! End of inline udiv_qrnnd" \ |
---|
1305 | : "=r" (q), "=r" (r) : "r" (n1), "r" (n0), "rI" (d) \ |
---|
1306 | : "%g1" __AND_CLOBBER_CC) |
---|
1307 | #define UDIV_TIME 37 |
---|
1308 | #define count_leading_zeros(count, x) \ |
---|
1309 | __asm__ ("scan %1,1,%0" : "=r" (count) : "r" (x)) |
---|
1310 | /* Early sparclites return 63 for an argument of 0, but they warn that future |
---|
1311 | implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0 |
---|
1312 | undefined. */ |
---|
1313 | #endif /* __sparclite__ */ |
---|
1314 | #endif /* __sparc_v8__ */ |
---|
1315 | #endif /* __sparc_v9__ */ |
---|
1316 | /* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */ |
---|
1317 | #ifndef umul_ppmm |
---|
1318 | #define umul_ppmm(w1, w0, u, v) \ |
---|
1319 | __asm__ ("! Inlined umul_ppmm\n" \ |
---|
1320 | " wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n" \ |
---|
1321 | " sra %3,31,%%g2 ! Don't move this insn\n" \ |
---|
1322 | " and %2,%%g2,%%g2 ! Don't move this insn\n" \ |
---|
1323 | " andcc %%g0,0,%%g1 ! Don't move this insn\n" \ |
---|
1324 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1325 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1326 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1327 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1328 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1329 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1330 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1331 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1332 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1333 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1334 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1335 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1336 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1337 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1338 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1339 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1340 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1341 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1342 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1343 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1344 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1345 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1346 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1347 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1348 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1349 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1350 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1351 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1352 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1353 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1354 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1355 | " mulscc %%g1,%3,%%g1\n" \ |
---|
1356 | " mulscc %%g1,0,%%g1\n" \ |
---|
1357 | " add %%g1,%%g2,%0\n" \ |
---|
1358 | " rd %%y,%1" \ |
---|
1359 | : "=r" (w1), "=r" (w0) : "%rI" (u), "r" (v) \ |
---|
1360 | : "%g1", "%g2" __AND_CLOBBER_CC) |
---|
1361 | #define UMUL_TIME 39 /* 39 instructions */ |
---|
1362 | #endif |
---|
1363 | #ifndef udiv_qrnnd |
---|
1364 | #ifndef LONGLONG_STANDALONE |
---|
1365 | #define udiv_qrnnd(q, r, n1, n0, d) \ |
---|
1366 | do { UWtype __r; \ |
---|
1367 | (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \ |
---|
1368 | (r) = __r; \ |
---|
1369 | } while (0) |
---|
1370 | extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype)); |
---|
1371 | #ifndef UDIV_TIME |
---|
1372 | #define UDIV_TIME 140 |
---|
1373 | #endif |
---|
1374 | #endif /* LONGLONG_STANDALONE */ |
---|
1375 | #endif /* udiv_qrnnd */ |
---|
1376 | #endif /* __sparc__ */ |
---|
1377 | |
---|
1378 | #if defined (__sparc__) && W_TYPE_SIZE == 64 |
---|
1379 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |
---|
1380 | __asm__ ( \ |
---|
1381 | "addcc %r4,%5,%1\n" \ |
---|
1382 | " addccc %r6,%7,%%g0\n" \ |
---|
1383 | " addc %r2,%3,%0" \ |
---|
1384 | : "=r" (sh), "=&r" (sl) \ |
---|
1385 | : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl), \ |
---|
1386 | "%rJ" ((al) >> 32), "rI" ((bl) >> 32) \ |
---|
1387 | __CLOBBER_CC) |
---|
1388 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |
---|
1389 | __asm__ ( \ |
---|
1390 | "subcc %r4,%5,%1\n" \ |
---|
1391 | " subccc %r6,%7,%%g0\n" \ |
---|
1392 | " subc %r2,%3,%0" \ |
---|
1393 | : "=r" (sh), "=&r" (sl) \ |
---|
1394 | : "rJ" (ah), "rI" (bh), "rJ" (al), "rI" (bl), \ |
---|
1395 | "rJ" ((al) >> 32), "rI" ((bl) >> 32) \ |
---|
1396 | __CLOBBER_CC) |
---|
1397 | #endif |
---|
1398 | |
---|
1399 | #if defined (__vax__) && W_TYPE_SIZE == 32 |
---|
1400 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |
---|
1401 | __asm__ ("addl2 %5,%1\n\tadwc %3,%0" \ |
---|
1402 | : "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \ |
---|
1403 | : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ |
---|
1404 | "%1" ((USItype)(al)), "g" ((USItype)(bl))) |
---|
1405 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |
---|
1406 | __asm__ ("subl2 %5,%1\n\tsbwc %3,%0" \ |
---|
1407 | : "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \ |
---|
1408 | : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ |
---|
1409 | "1" ((USItype)(al)), "g" ((USItype)(bl))) |
---|
1410 | #define smul_ppmm(xh, xl, m0, m1) \ |
---|
1411 | do { \ |
---|
1412 | union {UDItype __ll; \ |
---|
1413 | struct {USItype __l, __h;} __i; \ |
---|
1414 | } __x; \ |
---|
1415 | USItype __m0 = (m0), __m1 = (m1); \ |
---|
1416 | __asm__ ("emul %1,%2,$0,%0" \ |
---|
1417 | : "=g" (__x.__ll) : "g" (__m0), "g" (__m1)); \ |
---|
1418 | (xh) = __x.__i.__h; (xl) = __x.__i.__l; \ |
---|
1419 | } while (0) |
---|
1420 | #define sdiv_qrnnd(q, r, n1, n0, d) \ |
---|
1421 | do { \ |
---|
1422 | union {DItype __ll; \ |
---|
1423 | struct {SItype __l, __h;} __i; \ |
---|
1424 | } __x; \ |
---|
1425 | __x.__i.__h = n1; __x.__i.__l = n0; \ |
---|
1426 | __asm__ ("ediv %3,%2,%0,%1" \ |
---|
1427 | : "=g" (q), "=g" (r) : "g" (__x.__ll), "g" (d)); \ |
---|
1428 | } while (0) |
---|
1429 | #if 0 |
---|
1430 | /* FIXME: This instruction appears to be unimplemented on some systems (vax |
---|
1431 | 8800 maybe). */ |
---|
1432 | #define count_trailing_zeros(count,x) \ |
---|
1433 | do { \ |
---|
1434 | __asm__ ("ffs 0, 31, %1, %0" \ |
---|
1435 | : "=g" ((USItype) (count)) \ |
---|
1436 | : "g" ((USItype) (x))); \ |
---|
1437 | } while (0) |
---|
1438 | #endif |
---|
1439 | #endif /* __vax__ */ |
---|
1440 | |
---|
1441 | #if defined (__z8000__) && W_TYPE_SIZE == 16 |
---|
1442 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |
---|
1443 | __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \ |
---|
1444 | : "=r" ((unsigned int)(sh)), "=&r" ((unsigned int)(sl)) \ |
---|
1445 | : "0" ((unsigned int)(ah)), "r" ((unsigned int)(bh)), \ |
---|
1446 | "%1" ((unsigned int)(al)), "rQR" ((unsigned int)(bl))) |
---|
1447 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |
---|
1448 | __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \ |
---|
1449 | : "=r" ((unsigned int)(sh)), "=&r" ((unsigned int)(sl)) \ |
---|
1450 | : "0" ((unsigned int)(ah)), "r" ((unsigned int)(bh)), \ |
---|
1451 | "1" ((unsigned int)(al)), "rQR" ((unsigned int)(bl))) |
---|
1452 | #define umul_ppmm(xh, xl, m0, m1) \ |
---|
1453 | do { \ |
---|
1454 | union {long int __ll; \ |
---|
1455 | struct {unsigned int __h, __l;} __i; \ |
---|
1456 | } __x; \ |
---|
1457 | unsigned int __m0 = (m0), __m1 = (m1); \ |
---|
1458 | __asm__ ("mult %S0,%H3" \ |
---|
1459 | : "=r" (__x.__i.__h), "=r" (__x.__i.__l) \ |
---|
1460 | : "%1" (m0), "rQR" (m1)); \ |
---|
1461 | (xh) = __x.__i.__h; (xl) = __x.__i.__l; \ |
---|
1462 | (xh) += ((((signed int) __m0 >> 15) & __m1) \ |
---|
1463 | + (((signed int) __m1 >> 15) & __m0)); \ |
---|
1464 | } while (0) |
---|
1465 | #endif /* __z8000__ */ |
---|
1466 | |
---|
1467 | #endif /* __GNUC__ */ |
---|
1468 | |
---|
1469 | #endif /* NO_ASM */ |
---|
1470 | |
---|
1471 | |
---|
1472 | #if !defined (umul_ppmm) && defined (__umulsidi3) |
---|
1473 | #define umul_ppmm(ph, pl, m0, m1) \ |
---|
1474 | { \ |
---|
1475 | UDWtype __ll = __umulsidi3 (m0, m1); \ |
---|
1476 | ph = (UWtype) (__ll >> W_TYPE_SIZE); \ |
---|
1477 | pl = (UWtype) __ll; \ |
---|
1478 | } |
---|
1479 | #endif |
---|
1480 | |
---|
1481 | #if !defined (__umulsidi3) |
---|
1482 | #define __umulsidi3(u, v) \ |
---|
1483 | ({UWtype __hi, __lo; \ |
---|
1484 | umul_ppmm (__hi, __lo, u, v); \ |
---|
1485 | ((UDWtype) __hi << W_TYPE_SIZE) | __lo; }) |
---|
1486 | #endif |
---|
1487 | |
---|
1488 | |
---|
1489 | /* Note the prototypes are under !define(umul_ppmm) etc too, since the HPPA |
---|
1490 | versions above are different and we don't want to conflict. */ |
---|
1491 | |
---|
1492 | #if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm |
---|
1493 | #define mpn_umul_ppmm __MPN(umul_ppmm) |
---|
1494 | extern mp_limb_t mpn_umul_ppmm _PROTO ((mp_limb_t *, mp_limb_t, mp_limb_t)); |
---|
1495 | #define umul_ppmm(wh, wl, u, v) \ |
---|
1496 | do { \ |
---|
1497 | mp_limb_t __umul_ppmm__p0; \ |
---|
1498 | (wh) = __MPN(umul_ppmm) (&__umul_ppmm__p0, \ |
---|
1499 | (mp_limb_t) (u), (mp_limb_t) (v)); \ |
---|
1500 | (wl) = __umul_ppmm__p0; \ |
---|
1501 | } while (0) |
---|
1502 | #endif |
---|
1503 | |
---|
1504 | #if ! defined (udiv_qrnnd) && HAVE_NATIVE_mpn_udiv_qrnnd |
---|
1505 | #define mpn_udiv_qrnnd __MPN(udiv_qrnnd) |
---|
1506 | extern mp_limb_t mpn_udiv_qrnnd _PROTO ((mp_limb_t *, |
---|
1507 | mp_limb_t, mp_limb_t, mp_limb_t)); |
---|
1508 | #define udiv_qrnnd(q, r, n1, n0, d) \ |
---|
1509 | do { \ |
---|
1510 | mp_limb_t __udiv_qrnnd__r; \ |
---|
1511 | (q) = mpn_udiv_qrnnd (&__udiv_qrnnd__r, \ |
---|
1512 | (mp_limb_t) (n1), (mp_limb_t) (n0), (mp_limb_t) d); \ |
---|
1513 | (r) = __udiv_qrnnd__r; \ |
---|
1514 | } while (0) |
---|
1515 | #endif |
---|
1516 | |
---|
1517 | |
---|
1518 | /* If this machine has no inline assembler, use C macros. */ |
---|
1519 | |
---|
1520 | #if !defined (add_ssaaaa) |
---|
1521 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |
---|
1522 | do { \ |
---|
1523 | UWtype __x; \ |
---|
1524 | __x = (al) + (bl); \ |
---|
1525 | (sh) = (ah) + (bh) + (__x < (al)); \ |
---|
1526 | (sl) = __x; \ |
---|
1527 | } while (0) |
---|
1528 | #endif |
---|
1529 | |
---|
1530 | #if !defined (sub_ddmmss) |
---|
1531 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |
---|
1532 | do { \ |
---|
1533 | UWtype __x; \ |
---|
1534 | __x = (al) - (bl); \ |
---|
1535 | (sh) = (ah) - (bh) - (__x > (al)); \ |
---|
1536 | (sl) = __x; \ |
---|
1537 | } while (0) |
---|
1538 | #endif |
---|
1539 | |
---|
1540 | /* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of |
---|
1541 | smul_ppmm. */ |
---|
1542 | #if !defined (umul_ppmm) && defined (smul_ppmm) |
---|
1543 | #define umul_ppmm(w1, w0, u, v) \ |
---|
1544 | do { \ |
---|
1545 | UWtype __w1; \ |
---|
1546 | UWtype __xm0 = (u), __xm1 = (v); \ |
---|
1547 | smul_ppmm (__w1, w0, __xm0, __xm1); \ |
---|
1548 | (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \ |
---|
1549 | + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \ |
---|
1550 | } while (0) |
---|
1551 | #endif |
---|
1552 | |
---|
1553 | /* If we still don't have umul_ppmm, define it using plain C. */ |
---|
1554 | #if !defined (umul_ppmm) |
---|
1555 | #define umul_ppmm(w1, w0, u, v) \ |
---|
1556 | do { \ |
---|
1557 | UWtype __x0, __x1, __x2, __x3; \ |
---|
1558 | UHWtype __ul, __vl, __uh, __vh; \ |
---|
1559 | UWtype __u = (u), __v = (v); \ |
---|
1560 | \ |
---|
1561 | __ul = __ll_lowpart (__u); \ |
---|
1562 | __uh = __ll_highpart (__u); \ |
---|
1563 | __vl = __ll_lowpart (__v); \ |
---|
1564 | __vh = __ll_highpart (__v); \ |
---|
1565 | \ |
---|
1566 | __x0 = (UWtype) __ul * __vl; \ |
---|
1567 | __x1 = (UWtype) __ul * __vh; \ |
---|
1568 | __x2 = (UWtype) __uh * __vl; \ |
---|
1569 | __x3 = (UWtype) __uh * __vh; \ |
---|
1570 | \ |
---|
1571 | __x1 += __ll_highpart (__x0);/* this can't give carry */ \ |
---|
1572 | __x1 += __x2; /* but this indeed can */ \ |
---|
1573 | if (__x1 < __x2) /* did we get it? */ \ |
---|
1574 | __x3 += __ll_B; /* yes, add it in the proper pos. */ \ |
---|
1575 | \ |
---|
1576 | (w1) = __x3 + __ll_highpart (__x1); \ |
---|
1577 | (w0) = (__x1 << W_TYPE_SIZE/2) + __ll_lowpart (__x0); \ |
---|
1578 | } while (0) |
---|
1579 | #endif |
---|
1580 | |
---|
1581 | /* If we don't have smul_ppmm, define it using umul_ppmm (which surely will |
---|
1582 | exist in one form or another. */ |
---|
1583 | #if !defined (smul_ppmm) |
---|
1584 | #define smul_ppmm(w1, w0, u, v) \ |
---|
1585 | do { \ |
---|
1586 | UWtype __w1; \ |
---|
1587 | UWtype __xm0 = (u), __xm1 = (v); \ |
---|
1588 | umul_ppmm (__w1, w0, __xm0, __xm1); \ |
---|
1589 | (w1) = __w1 - (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \ |
---|
1590 | - (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \ |
---|
1591 | } while (0) |
---|
1592 | #endif |
---|
1593 | |
---|
1594 | /* Define this unconditionally, so it can be used for debugging. */ |
---|
1595 | #define __udiv_qrnnd_c(q, r, n1, n0, d) \ |
---|
1596 | do { \ |
---|
1597 | UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \ |
---|
1598 | \ |
---|
1599 | ASSERT ((d) != 0); \ |
---|
1600 | ASSERT ((n1) < (d)); \ |
---|
1601 | \ |
---|
1602 | __d1 = __ll_highpart (d); \ |
---|
1603 | __d0 = __ll_lowpart (d); \ |
---|
1604 | \ |
---|
1605 | __q1 = (n1) / __d1; \ |
---|
1606 | __r1 = (n1) - __q1 * __d1; \ |
---|
1607 | __m = (UWtype) __q1 * __d0; \ |
---|
1608 | __r1 = __r1 * __ll_B | __ll_highpart (n0); \ |
---|
1609 | if (__r1 < __m) \ |
---|
1610 | { \ |
---|
1611 | __q1--, __r1 += (d); \ |
---|
1612 | if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\ |
---|
1613 | if (__r1 < __m) \ |
---|
1614 | __q1--, __r1 += (d); \ |
---|
1615 | } \ |
---|
1616 | __r1 -= __m; \ |
---|
1617 | \ |
---|
1618 | __q0 = __r1 / __d1; \ |
---|
1619 | __r0 = __r1 - __q0 * __d1; \ |
---|
1620 | __m = (UWtype) __q0 * __d0; \ |
---|
1621 | __r0 = __r0 * __ll_B | __ll_lowpart (n0); \ |
---|
1622 | if (__r0 < __m) \ |
---|
1623 | { \ |
---|
1624 | __q0--, __r0 += (d); \ |
---|
1625 | if (__r0 >= (d)) \ |
---|
1626 | if (__r0 < __m) \ |
---|
1627 | __q0--, __r0 += (d); \ |
---|
1628 | } \ |
---|
1629 | __r0 -= __m; \ |
---|
1630 | \ |
---|
1631 | (q) = (UWtype) __q1 * __ll_B | __q0; \ |
---|
1632 | (r) = __r0; \ |
---|
1633 | } while (0) |
---|
1634 | |
---|
1635 | /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through |
---|
1636 | __udiv_w_sdiv (defined in libgcc or elsewhere). */ |
---|
1637 | #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd) |
---|
1638 | #define udiv_qrnnd(q, r, nh, nl, d) \ |
---|
1639 | do { \ |
---|
1640 | UWtype __r; \ |
---|
1641 | (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \ |
---|
1642 | (r) = __r; \ |
---|
1643 | } while (0) |
---|
1644 | #endif |
---|
1645 | |
---|
1646 | /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */ |
---|
1647 | #if !defined (udiv_qrnnd) |
---|
1648 | #define UDIV_NEEDS_NORMALIZATION 1 |
---|
1649 | #define udiv_qrnnd __udiv_qrnnd_c |
---|
1650 | #endif |
---|
1651 | |
---|
1652 | #if !defined (count_leading_zeros) |
---|
1653 | #define count_leading_zeros(count, x) \ |
---|
1654 | do { \ |
---|
1655 | UWtype __xr = (x); \ |
---|
1656 | UWtype __a; \ |
---|
1657 | \ |
---|
1658 | if (W_TYPE_SIZE == 32) \ |
---|
1659 | { \ |
---|
1660 | __a = __xr < ((UWtype) 1 << 2*__BITS4) \ |
---|
1661 | ? (__xr < ((UWtype) 1 << __BITS4) ? 1 : __BITS4 + 1) \ |
---|
1662 | : (__xr < ((UWtype) 1 << 3*__BITS4) ? 2*__BITS4 + 1 \ |
---|
1663 | : 3*__BITS4 + 1); \ |
---|
1664 | } \ |
---|
1665 | else \ |
---|
1666 | { \ |
---|
1667 | for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \ |
---|
1668 | if (((__xr >> __a) & 0xff) != 0) \ |
---|
1669 | break; \ |
---|
1670 | ++__a; \ |
---|
1671 | } \ |
---|
1672 | \ |
---|
1673 | (count) = W_TYPE_SIZE + 1 - __a - __clz_tab[__xr >> __a]; \ |
---|
1674 | } while (0) |
---|
1675 | /* This version gives a well-defined value for zero. */ |
---|
1676 | #define COUNT_LEADING_ZEROS_0 (W_TYPE_SIZE - 1) |
---|
1677 | #define COUNT_LEADING_ZEROS_NEED_CLZ_TAB |
---|
1678 | #endif |
---|
1679 | |
---|
1680 | #ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB |
---|
1681 | extern const unsigned char __GMP_DECLSPEC __clz_tab[128]; |
---|
1682 | #endif |
---|
1683 | |
---|
1684 | #if !defined (count_trailing_zeros) |
---|
1685 | /* Define count_trailing_zeros using count_leading_zeros. The latter might be |
---|
1686 | defined in asm, but if it is not, the C version above is good enough. */ |
---|
1687 | #define count_trailing_zeros(count, x) \ |
---|
1688 | do { \ |
---|
1689 | UWtype __ctz_x = (x); \ |
---|
1690 | UWtype __ctz_c; \ |
---|
1691 | ASSERT (__ctz_x != 0); \ |
---|
1692 | count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \ |
---|
1693 | (count) = W_TYPE_SIZE - 1 - __ctz_c; \ |
---|
1694 | } while (0) |
---|
1695 | #endif |
---|
1696 | |
---|
1697 | #ifndef UDIV_NEEDS_NORMALIZATION |
---|
1698 | #define UDIV_NEEDS_NORMALIZATION 0 |
---|
1699 | #endif |
---|
1700 | |
---|
1701 | /* Whether udiv_qrnnd is actually implemented with udiv_qrnnd_preinv, and |
---|
1702 | that hence the latter should always be used. */ |
---|
1703 | #ifndef UDIV_PREINV_ALWAYS |
---|
1704 | #define UDIV_PREINV_ALWAYS 0 |
---|
1705 | #endif |
---|
1706 | |
---|
1707 | /* Give defaults for UMUL_TIME and UDIV_TIME. */ |
---|
1708 | #ifndef UMUL_TIME |
---|
1709 | #define UMUL_TIME 1 |
---|
1710 | #endif |
---|
1711 | |
---|
1712 | #ifndef UDIV_TIME |
---|
1713 | #define UDIV_TIME UMUL_TIME |
---|
1714 | #endif |
---|