1 | /* Include file for internal GNU MP types and definitions. |
---|
2 | |
---|
3 | THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO |
---|
4 | BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES. |
---|
5 | |
---|
6 | Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000 Free Software |
---|
7 | Foundation, Inc. |
---|
8 | |
---|
9 | This file is part of the GNU MP Library. |
---|
10 | |
---|
11 | The GNU MP Library is free software; you can redistribute it and/or modify |
---|
12 | it under the terms of the GNU Lesser General Public License as published by |
---|
13 | the Free Software Foundation; either version 2.1 of the License, or (at your |
---|
14 | option) any later version. |
---|
15 | |
---|
16 | The GNU MP Library is distributed in the hope that it will be useful, but |
---|
17 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
---|
18 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
---|
19 | License for more details. |
---|
20 | |
---|
21 | You should have received a copy of the GNU Lesser General Public License |
---|
22 | along with the GNU MP Library; see the file COPYING.LIB. If not, write to |
---|
23 | the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
---|
24 | MA 02111-1307, USA. */ |
---|
25 | |
---|
26 | #include "config.h" |
---|
27 | #include "gmp-mparam.h" |
---|
28 | /* #include "longlong.h" */ |
---|
29 | |
---|
30 | /* When using gcc, make sure to use its builtin alloca. */ |
---|
31 | #if ! defined (alloca) && defined (__GNUC__) |
---|
32 | #define alloca __builtin_alloca |
---|
33 | #define HAVE_ALLOCA |
---|
34 | #endif |
---|
35 | |
---|
36 | /* When using cc, do whatever necessary to allow use of alloca. For many |
---|
37 | machines, this means including alloca.h. IBM's compilers need a #pragma |
---|
38 | in "each module that needs to use alloca". */ |
---|
39 | #if ! defined (alloca) |
---|
40 | /* We need lots of variants for MIPS, to cover all versions and perversions |
---|
41 | of OSes for MIPS. */ |
---|
42 | #if defined (__mips) || defined (MIPSEL) || defined (MIPSEB) \ |
---|
43 | || defined (_MIPSEL) || defined (_MIPSEB) || defined (__sgi) \ |
---|
44 | || defined (__alpha) || defined (__sparc) || defined (sparc) \ |
---|
45 | || defined (__ksr__) |
---|
46 | #include <alloca.h> |
---|
47 | #define HAVE_ALLOCA |
---|
48 | #endif |
---|
49 | #if defined (_IBMR2) |
---|
50 | #pragma alloca |
---|
51 | #define HAVE_ALLOCA |
---|
52 | #endif |
---|
53 | #if defined (__DECC) |
---|
54 | #define alloca(x) __ALLOCA(x) |
---|
55 | #define HAVE_ALLOCA |
---|
56 | #endif |
---|
57 | #endif |
---|
58 | |
---|
59 | #if defined (alloca) |
---|
60 | #define HAVE_ALLOCA |
---|
61 | #endif |
---|
62 | |
---|
63 | #if ! defined (HAVE_ALLOCA) || USE_STACK_ALLOC |
---|
64 | #include "stack-alloc.h" |
---|
65 | #else |
---|
66 | #define TMP_DECL(m) |
---|
67 | #define TMP_ALLOC(x) alloca(x) |
---|
68 | #define TMP_MARK(m) |
---|
69 | #define TMP_FREE(m) |
---|
70 | #endif |
---|
71 | |
---|
72 | /* Allocating various types. */ |
---|
73 | #define TMP_ALLOC_TYPE(n,type) ((type *) TMP_ALLOC ((n) * sizeof (type))) |
---|
74 | #define TMP_ALLOC_LIMBS(n) TMP_ALLOC_TYPE(n,mp_limb_t) |
---|
75 | #define TMP_ALLOC_MP_PTRS(n) TMP_ALLOC_TYPE(n,mp_ptr) |
---|
76 | |
---|
77 | |
---|
78 | #if ! defined (__GNUC__) /* FIXME: Test for C++ compilers here, |
---|
79 | __DECC understands __inline */ |
---|
80 | #define inline /* Empty */ |
---|
81 | #endif |
---|
82 | |
---|
83 | #define ABS(x) (x >= 0 ? x : -x) |
---|
84 | #define MIN(l,o) ((l) < (o) ? (l) : (o)) |
---|
85 | #define MAX(h,i) ((h) > (i) ? (h) : (i)) |
---|
86 | #define numberof(x) (sizeof (x) / sizeof ((x)[0])) |
---|
87 | |
---|
88 | /* Field access macros. */ |
---|
89 | #define SIZ(x) ((x)->_mp_size) |
---|
90 | #define ABSIZ(x) ABS (SIZ (x)) |
---|
91 | #define PTR(x) ((x)->_mp_d) |
---|
92 | #define LIMBS(x) ((x)->_mp_d) |
---|
93 | #define EXP(x) ((x)->_mp_exp) |
---|
94 | #define PREC(x) ((x)->_mp_prec) |
---|
95 | #define ALLOC(x) ((x)->_mp_alloc) |
---|
96 | |
---|
97 | /* Extra casts because shorts are promoted to ints by "~" and "<<". "-1" |
---|
98 | rather than "1" in SIGNED_TYPE_MIN avoids warnings from some compilers |
---|
99 | about arithmetic overflow. */ |
---|
100 | #define UNSIGNED_TYPE_MAX(type) ((type) ~ (type) 0) |
---|
101 | #define UNSIGNED_TYPE_HIGHBIT(type) ((type) ~ (UNSIGNED_TYPE_MAX(type) >> 1)) |
---|
102 | #define SIGNED_TYPE_MIN(type) (((type) -1) << (8*sizeof(type)-1)) |
---|
103 | #define SIGNED_TYPE_MAX(type) ((type) ~ SIGNED_TYPE_MIN(type)) |
---|
104 | #define SIGNED_TYPE_HIGHBIT(type) SIGNED_TYPE_MIN(type) |
---|
105 | |
---|
106 | #define MP_LIMB_T_MAX UNSIGNED_TYPE_MAX (mp_limb_t) |
---|
107 | #define MP_LIMB_T_HIGHBIT UNSIGNED_TYPE_HIGHBIT (mp_limb_t) |
---|
108 | |
---|
109 | #define MP_SIZE_T_MAX SIGNED_TYPE_MAX (mp_size_t) |
---|
110 | |
---|
111 | #ifndef ULONG_MAX |
---|
112 | #define ULONG_MAX UNSIGNED_TYPE_MAX (unsigned long) |
---|
113 | #endif |
---|
114 | #define ULONG_HIGHBIT UNSIGNED_TYPE_HIGHBIT (unsigned long) |
---|
115 | #define LONG_HIGHBIT SIGNED_TYPE_HIGHBIT (long) |
---|
116 | #ifndef LONG_MAX |
---|
117 | #define LONG_MAX SIGNED_TYPE_MAX (long) |
---|
118 | #endif |
---|
119 | |
---|
120 | #ifndef USHORT_MAX |
---|
121 | #define USHORT_MAX UNSIGNED_TYPE_MAX (unsigned short) |
---|
122 | #endif |
---|
123 | #define USHORT_HIGHBIT UNSIGNED_TYPE_HIGHBIT (unsigned short) |
---|
124 | #define SHORT_HIGHBIT SIGNED_TYPE_HIGHBIT (short) |
---|
125 | #ifndef SHORT_MAX |
---|
126 | #define SHORT_MAX SIGNED_TYPE_MAX (short) |
---|
127 | #endif |
---|
128 | |
---|
129 | |
---|
130 | /* Swap macros. */ |
---|
131 | |
---|
132 | #define MP_LIMB_T_SWAP(x, y) \ |
---|
133 | do { \ |
---|
134 | mp_limb_t __mp_limb_t_swap__tmp = (x); \ |
---|
135 | (x) = (y); \ |
---|
136 | (y) = __mp_limb_t_swap__tmp; \ |
---|
137 | } while (0) |
---|
138 | #define MP_SIZE_T_SWAP(x, y) \ |
---|
139 | do { \ |
---|
140 | mp_size_t __mp_size_t_swap__tmp = (x); \ |
---|
141 | (x) = (y); \ |
---|
142 | (y) = __mp_size_t_swap__tmp; \ |
---|
143 | } while (0) |
---|
144 | |
---|
145 | #define MP_PTR_SWAP(x, y) \ |
---|
146 | do { \ |
---|
147 | mp_ptr __mp_ptr_swap__tmp = (x); \ |
---|
148 | (x) = (y); \ |
---|
149 | (y) = __mp_ptr_swap__tmp; \ |
---|
150 | } while (0) |
---|
151 | #define MP_SRCPTR_SWAP(x, y) \ |
---|
152 | do { \ |
---|
153 | mp_srcptr __mp_srcptr_swap__tmp = (x); \ |
---|
154 | (x) = (y); \ |
---|
155 | (y) = __mp_srcptr_swap__tmp; \ |
---|
156 | } while (0) |
---|
157 | |
---|
158 | #define MPN_PTR_SWAP(xp,xs, yp,ys) \ |
---|
159 | do { \ |
---|
160 | MP_PTR_SWAP (xp, yp); \ |
---|
161 | MP_SIZE_T_SWAP (xs, ys); \ |
---|
162 | } while(0) |
---|
163 | #define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \ |
---|
164 | do { \ |
---|
165 | MP_SRCPTR_SWAP (xp, yp); \ |
---|
166 | MP_SIZE_T_SWAP (xs, ys); \ |
---|
167 | } while(0) |
---|
168 | |
---|
169 | #define MPZ_PTR_SWAP(x, y) \ |
---|
170 | do { \ |
---|
171 | mpz_ptr __mpz_ptr_swap__tmp = (x); \ |
---|
172 | (x) = (y); \ |
---|
173 | (y) = __mpz_ptr_swap__tmp; \ |
---|
174 | } while (0) |
---|
175 | #define MPZ_SRCPTR_SWAP(x, y) \ |
---|
176 | do { \ |
---|
177 | mpz_srcptr __mpz_srcptr_swap__tmp = (x); \ |
---|
178 | (x) = (y); \ |
---|
179 | (y) = __mpz_srcptr_swap__tmp; \ |
---|
180 | } while (0) |
---|
181 | |
---|
182 | |
---|
183 | #if defined (__cplusplus) |
---|
184 | extern "C" { |
---|
185 | #endif |
---|
186 | |
---|
187 | /* FIXME: These are purely internal, so do a search and replace to change |
---|
188 | them to __gmp forms, rather than using these macros. */ |
---|
189 | #define _mp_allocate_func __gmp_allocate_func |
---|
190 | #define _mp_reallocate_func __gmp_reallocate_func |
---|
191 | #define _mp_free_func __gmp_free_func |
---|
192 | #define _mp_default_allocate __gmp_default_allocate |
---|
193 | #define _mp_default_reallocate __gmp_default_reallocate |
---|
194 | #define _mp_default_free __gmp_default_free |
---|
195 | |
---|
196 | extern void * (*_mp_allocate_func) _PROTO ((size_t)); |
---|
197 | extern void * (*_mp_reallocate_func) _PROTO ((void *, size_t, size_t)); |
---|
198 | extern void (*_mp_free_func) _PROTO ((void *, size_t)); |
---|
199 | |
---|
200 | void *_mp_default_allocate _PROTO ((size_t)); |
---|
201 | void *_mp_default_reallocate _PROTO ((void *, size_t, size_t)); |
---|
202 | void _mp_default_free _PROTO ((void *, size_t)); |
---|
203 | |
---|
204 | #define _MP_ALLOCATE_FUNC_TYPE(n,type) \ |
---|
205 | ((type *) (*_mp_allocate_func) ((n) * sizeof (type))) |
---|
206 | #define _MP_ALLOCATE_FUNC_LIMBS(n) _MP_ALLOCATE_FUNC_TYPE(n,mp_limb_t) |
---|
207 | |
---|
208 | #define _MP_FREE_FUNC_TYPE(p,n,type) (*_mp_free_func) (p, (n) * sizeof (type)) |
---|
209 | #define _MP_FREE_FUNC_LIMBS(p,n) _MP_FREE_FUNC_TYPE(p,n,mp_limb_t) |
---|
210 | |
---|
211 | |
---|
212 | #if (__STDC__-0) || defined (__cplusplus) |
---|
213 | |
---|
214 | #else |
---|
215 | |
---|
216 | #define const /* Empty */ |
---|
217 | #define signed /* Empty */ |
---|
218 | |
---|
219 | #endif |
---|
220 | |
---|
221 | #if defined (__GNUC__) && defined (__i386__) |
---|
222 | #if 0 /* check that these actually improve things */ |
---|
223 | #define MPN_COPY_INCR(DST, SRC, N) \ |
---|
224 | __asm__ ("cld\n\trep\n\tmovsl" : : \ |
---|
225 | "D" (DST), "S" (SRC), "c" (N) : \ |
---|
226 | "cx", "di", "si", "memory") |
---|
227 | #define MPN_COPY_DECR(DST, SRC, N) \ |
---|
228 | __asm__ ("std\n\trep\n\tmovsl" : : \ |
---|
229 | "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) : \ |
---|
230 | "cx", "di", "si", "memory") |
---|
231 | #define MPN_NORMALIZE_NOT_ZERO(P, N) \ |
---|
232 | do { \ |
---|
233 | __asm__ ("std\n\trepe\n\tscasl" : "=c" (N) : \ |
---|
234 | "a" (0), "D" ((P) + (N) - 1), "0" (N) : \ |
---|
235 | "cx", "di"); \ |
---|
236 | (N)++; \ |
---|
237 | } while (0) |
---|
238 | #endif |
---|
239 | #endif |
---|
240 | |
---|
241 | #if HAVE_NATIVE_mpn_copyi |
---|
242 | #define mpn_copyi __MPN(copyi) |
---|
243 | void mpn_copyi _PROTO ((mp_ptr, mp_srcptr, mp_size_t)); |
---|
244 | #endif |
---|
245 | |
---|
246 | /* Remap names of internal mpn functions. */ |
---|
247 | #define __clz_tab __MPN(clz_tab) |
---|
248 | #define mpn_udiv_w_sdiv __MPN(udiv_w_sdiv) |
---|
249 | #define mpn_reciprocal __MPN(reciprocal) |
---|
250 | |
---|
251 | #define mpn_sb_divrem_mn __MPN(sb_divrem_mn) |
---|
252 | #define mpn_bz_divrem_n __MPN(bz_divrem_n) |
---|
253 | /* #define mpn_tdiv_q __MPN(tdiv_q) */ |
---|
254 | |
---|
255 | #define mpn_kara_mul_n __MPN(kara_mul_n) |
---|
256 | void mpn_kara_mul_n _PROTO((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr)); |
---|
257 | |
---|
258 | #define mpn_kara_sqr_n __MPN(kara_sqr_n) |
---|
259 | void mpn_kara_sqr_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); |
---|
260 | |
---|
261 | #define mpn_toom3_mul_n __MPN(toom3_mul_n) |
---|
262 | void mpn_toom3_mul_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t,mp_ptr)); |
---|
263 | |
---|
264 | #define mpn_toom3_sqr_n __MPN(toom3_sqr_n) |
---|
265 | void mpn_toom3_sqr_n _PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); |
---|
266 | |
---|
267 | #define mpn_fft_best_k __MPN(fft_best_k) |
---|
268 | int mpn_fft_best_k _PROTO ((mp_size_t n, int sqr)); |
---|
269 | |
---|
270 | #define mpn_mul_fft __MPN(mul_fft) |
---|
271 | void mpn_mul_fft _PROTO ((mp_ptr op, mp_size_t pl, |
---|
272 | mp_srcptr n, mp_size_t nl, |
---|
273 | mp_srcptr m, mp_size_t ml, |
---|
274 | int k)); |
---|
275 | |
---|
276 | #define mpn_mul_fft_full __MPN(mul_fft_full) |
---|
277 | void mpn_mul_fft_full _PROTO ((mp_ptr op, |
---|
278 | mp_srcptr n, mp_size_t nl, |
---|
279 | mp_srcptr m, mp_size_t ml)); |
---|
280 | |
---|
281 | #define mpn_fft_next_size __MPN(fft_next_size) |
---|
282 | mp_size_t mpn_fft_next_size _PROTO ((mp_size_t pl, int k)); |
---|
283 | |
---|
284 | mp_limb_t mpn_sb_divrem_mn _PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t)); |
---|
285 | mp_limb_t mpn_bz_divrem_n _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t)); |
---|
286 | /* void mpn_tdiv_q _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); */ |
---|
287 | |
---|
288 | /* Copy NLIMBS *limbs* from SRC to DST, NLIMBS==0 allowed. */ |
---|
289 | #ifndef MPN_COPY_INCR |
---|
290 | #if HAVE_NATIVE_mpn_copyi |
---|
291 | #define MPN_COPY_INCR(DST, SRC, NLIMBS) mpn_copyi (DST, SRC, NLIMBS) |
---|
292 | #else |
---|
293 | #define MPN_COPY_INCR(DST, SRC, NLIMBS) \ |
---|
294 | do { \ |
---|
295 | mp_size_t __i; \ |
---|
296 | for (__i = 0; __i < (NLIMBS); __i++) \ |
---|
297 | (DST)[__i] = (SRC)[__i]; \ |
---|
298 | } while (0) |
---|
299 | #endif |
---|
300 | #endif |
---|
301 | |
---|
302 | #if HAVE_NATIVE_mpn_copyd |
---|
303 | #define mpn_copyd __MPN(copyd) |
---|
304 | void mpn_copyd _PROTO ((mp_ptr, mp_srcptr, mp_size_t)); |
---|
305 | #endif |
---|
306 | |
---|
307 | /* NLIMBS==0 allowed */ |
---|
308 | #ifndef MPN_COPY_DECR |
---|
309 | #if HAVE_NATIVE_mpn_copyd |
---|
310 | #define MPN_COPY_DECR(DST, SRC, NLIMBS) mpn_copyd (DST, SRC, NLIMBS) |
---|
311 | #else |
---|
312 | #define MPN_COPY_DECR(DST, SRC, NLIMBS) \ |
---|
313 | do { \ |
---|
314 | mp_size_t __i; \ |
---|
315 | for (__i = (NLIMBS) - 1; __i >= 0; __i--) \ |
---|
316 | (DST)[__i] = (SRC)[__i]; \ |
---|
317 | } while (0) |
---|
318 | #endif |
---|
319 | #endif |
---|
320 | |
---|
321 | /* Define MPN_COPY for vector computers. Since #pragma cannot be in a macro, |
---|
322 | rely on function inlining. */ |
---|
323 | #if defined (_CRAY) || defined (__uxp__) |
---|
324 | static inline void |
---|
325 | _MPN_COPY (d, s, n) mp_ptr d; mp_srcptr s; mp_size_t n; |
---|
326 | { |
---|
327 | int i; /* Faster for Cray with plain int */ |
---|
328 | #pragma _CRI ivdep /* Cray PVP systems */ |
---|
329 | #pragma loop noalias d,s /* Fujitsu VPP systems */ |
---|
330 | for (i = 0; i < n; i++) |
---|
331 | d[i] = s[i]; |
---|
332 | } |
---|
333 | #define MPN_COPY _MPN_COPY |
---|
334 | #endif |
---|
335 | |
---|
336 | #ifndef MPN_COPY |
---|
337 | #define MPN_COPY MPN_COPY_INCR |
---|
338 | #endif |
---|
339 | |
---|
340 | /* Zero NLIMBS *limbs* AT DST. */ |
---|
341 | #ifndef MPN_ZERO |
---|
342 | #define MPN_ZERO(DST, NLIMBS) \ |
---|
343 | do { \ |
---|
344 | mp_size_t __i; \ |
---|
345 | for (__i = 0; __i < (NLIMBS); __i++) \ |
---|
346 | (DST)[__i] = 0; \ |
---|
347 | } while (0) |
---|
348 | #endif |
---|
349 | |
---|
350 | #ifndef MPN_NORMALIZE |
---|
351 | #define MPN_NORMALIZE(DST, NLIMBS) \ |
---|
352 | do { \ |
---|
353 | while (NLIMBS > 0) \ |
---|
354 | { \ |
---|
355 | if ((DST)[(NLIMBS) - 1] != 0) \ |
---|
356 | break; \ |
---|
357 | NLIMBS--; \ |
---|
358 | } \ |
---|
359 | } while (0) |
---|
360 | #endif |
---|
361 | #ifndef MPN_NORMALIZE_NOT_ZERO |
---|
362 | #define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \ |
---|
363 | do { \ |
---|
364 | while (1) \ |
---|
365 | { \ |
---|
366 | if ((DST)[(NLIMBS) - 1] != 0) \ |
---|
367 | break; \ |
---|
368 | NLIMBS--; \ |
---|
369 | } \ |
---|
370 | } while (0) |
---|
371 | #endif |
---|
372 | |
---|
373 | /* Strip least significant zero limbs from ptr,size by incrementing ptr and |
---|
374 | decrementing size. The number in ptr,size must be non-zero, ie. size!=0 |
---|
375 | and somewhere a non-zero limb. */ |
---|
376 | #define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size) \ |
---|
377 | do \ |
---|
378 | { \ |
---|
379 | ASSERT ((size) != 0); \ |
---|
380 | while ((ptr)[0] == 0) \ |
---|
381 | { \ |
---|
382 | (ptr)++; \ |
---|
383 | (size)--; \ |
---|
384 | ASSERT (size >= 0); \ |
---|
385 | } \ |
---|
386 | } \ |
---|
387 | while (0) |
---|
388 | |
---|
389 | /* Initialize X of type mpz_t with space for NLIMBS limbs. X should be a |
---|
390 | temporary variable; it will be automatically cleared out at function |
---|
391 | return. We use __x here to make it possible to accept both mpz_ptr and |
---|
392 | mpz_t arguments. */ |
---|
393 | #define MPZ_TMP_INIT(X, NLIMBS) \ |
---|
394 | do { \ |
---|
395 | mpz_ptr __x = (X); \ |
---|
396 | __x->_mp_alloc = (NLIMBS); \ |
---|
397 | __x->_mp_d = (mp_ptr) TMP_ALLOC ((NLIMBS) * BYTES_PER_MP_LIMB); \ |
---|
398 | } while (0) |
---|
399 | |
---|
400 | /* Realloc for an mpz_t WHAT if it has less thann NEEDED limbs. */ |
---|
401 | #define MPZ_REALLOC(what,needed) \ |
---|
402 | do { \ |
---|
403 | if ((needed) > ALLOC (what)) \ |
---|
404 | _mpz_realloc (what, needed); \ |
---|
405 | } while (0) |
---|
406 | |
---|
407 | /* If KARATSUBA_MUL_THRESHOLD is not already defined, define it to a |
---|
408 | value which is good on most machines. */ |
---|
409 | #ifndef KARATSUBA_MUL_THRESHOLD |
---|
410 | #define KARATSUBA_MUL_THRESHOLD 32 |
---|
411 | #endif |
---|
412 | |
---|
413 | /* If TOOM3_MUL_THRESHOLD is not already defined, define it to a |
---|
414 | value which is good on most machines. */ |
---|
415 | #ifndef TOOM3_MUL_THRESHOLD |
---|
416 | #define TOOM3_MUL_THRESHOLD 256 |
---|
417 | #endif |
---|
418 | |
---|
419 | #ifndef KARATSUBA_SQR_THRESHOLD |
---|
420 | #define KARATSUBA_SQR_THRESHOLD (2*KARATSUBA_MUL_THRESHOLD) |
---|
421 | #endif |
---|
422 | |
---|
423 | #ifndef TOOM3_SQR_THRESHOLD |
---|
424 | #define TOOM3_SQR_THRESHOLD (2*TOOM3_MUL_THRESHOLD) |
---|
425 | #endif |
---|
426 | |
---|
427 | /* First k to use for an FFT modF multiply. A modF FFT is an order |
---|
428 | log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba, |
---|
429 | whereas k=4 is 1.33 which is faster than toom3 at 1.485. */ |
---|
430 | #define FFT_FIRST_K 4 |
---|
431 | |
---|
432 | /* Threshold at which FFT should be used to do a modF NxN -> N multiply. */ |
---|
433 | #ifndef FFT_MODF_MUL_THRESHOLD |
---|
434 | #define FFT_MODF_MUL_THRESHOLD (TOOM3_MUL_THRESHOLD * 3) |
---|
435 | #endif |
---|
436 | #ifndef FFT_MODF_SQR_THRESHOLD |
---|
437 | #define FFT_MODF_SQR_THRESHOLD (TOOM3_SQR_THRESHOLD * 3) |
---|
438 | #endif |
---|
439 | |
---|
440 | /* Threshold at which FFT should be used to do an NxN -> 2N multiply. This |
---|
441 | will be a size where FFT is using k=7 or k=8, since an FFT-k used for an |
---|
442 | NxN->2N multiply and not recursing into itself is an order |
---|
443 | log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which |
---|
444 | is the first better than toom3. */ |
---|
445 | #ifndef FFT_MUL_THRESHOLD |
---|
446 | #define FFT_MUL_THRESHOLD (FFT_MODF_MUL_THRESHOLD * 10) |
---|
447 | #endif |
---|
448 | #ifndef FFT_SQR_THRESHOLD |
---|
449 | #define FFT_SQR_THRESHOLD (FFT_MODF_SQR_THRESHOLD * 10) |
---|
450 | #endif |
---|
451 | |
---|
452 | /* Table of thresholds for successive modF FFT "k"s. The first entry is |
---|
453 | where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2, |
---|
454 | etc. See mpn_fft_best_k(). */ |
---|
455 | #ifndef FFT_MUL_TABLE |
---|
456 | #define FFT_MUL_TABLE \ |
---|
457 | { TOOM3_MUL_THRESHOLD * 4, /* k=5 */ \ |
---|
458 | TOOM3_MUL_THRESHOLD * 8, /* k=6 */ \ |
---|
459 | TOOM3_MUL_THRESHOLD * 16, /* k=7 */ \ |
---|
460 | TOOM3_MUL_THRESHOLD * 32, /* k=8 */ \ |
---|
461 | TOOM3_MUL_THRESHOLD * 96, /* k=9 */ \ |
---|
462 | TOOM3_MUL_THRESHOLD * 288, /* k=10 */ \ |
---|
463 | 0 } |
---|
464 | #endif |
---|
465 | #ifndef FFT_SQR_TABLE |
---|
466 | #define FFT_SQR_TABLE \ |
---|
467 | { TOOM3_SQR_THRESHOLD * 4, /* k=5 */ \ |
---|
468 | TOOM3_SQR_THRESHOLD * 8, /* k=6 */ \ |
---|
469 | TOOM3_SQR_THRESHOLD * 16, /* k=7 */ \ |
---|
470 | TOOM3_SQR_THRESHOLD * 32, /* k=8 */ \ |
---|
471 | TOOM3_SQR_THRESHOLD * 96, /* k=9 */ \ |
---|
472 | TOOM3_SQR_THRESHOLD * 288, /* k=10 */ \ |
---|
473 | 0 } |
---|
474 | #endif |
---|
475 | |
---|
476 | #ifndef FFT_TABLE_ATTRS |
---|
477 | #define FFT_TABLE_ATTRS static const |
---|
478 | #endif |
---|
479 | |
---|
480 | #define MPN_FFT_TABLE_SIZE 16 |
---|
481 | |
---|
482 | |
---|
483 | /* Return non-zero if xp,xsize and yp,ysize overlap. |
---|
484 | If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no |
---|
485 | overlap. If both these are false, there's an overlap. */ |
---|
486 | #define MPN_OVERLAP_P(xp, xsize, yp, ysize) \ |
---|
487 | ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp)) |
---|
488 | |
---|
489 | |
---|
490 | /* ASSERT() is a private assertion checking scheme, similar to <assert.h>. |
---|
491 | ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS() |
---|
492 | does it always. Generally assertions are meant for development, but |
---|
493 | might help when looking for a problem later too. |
---|
494 | |
---|
495 | ASSERT_NOCARRY() uses ASSERT() to check the expression is zero, but if |
---|
496 | assertion checking is disabled, the expression is still evaluated. This |
---|
497 | is meant for use with routines like mpn_add_n() where the return value |
---|
498 | represents a carry or whatever that shouldn't occur. For example, |
---|
499 | ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */ |
---|
500 | |
---|
501 | #ifdef __LINE__ |
---|
502 | #define ASSERT_LINE __LINE__ |
---|
503 | #else |
---|
504 | #define ASSERT_LINE -1 |
---|
505 | #endif |
---|
506 | |
---|
507 | #ifdef __FILE__ |
---|
508 | #define ASSERT_FILE __FILE__ |
---|
509 | #else |
---|
510 | #define ASSERT_FILE "" |
---|
511 | #endif |
---|
512 | |
---|
513 | int __gmp_assert_fail _PROTO((const char *filename, int linenum, |
---|
514 | const char *expr)); |
---|
515 | |
---|
516 | #if HAVE_STRINGIZE |
---|
517 | #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr) |
---|
518 | #else |
---|
519 | #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr") |
---|
520 | #endif |
---|
521 | |
---|
522 | #if HAVE_VOID |
---|
523 | #define CAST_TO_VOID (void) |
---|
524 | #else |
---|
525 | #define CAST_TO_VOID |
---|
526 | #endif |
---|
527 | |
---|
528 | #define ASSERT_ALWAYS(expr) ((expr) ? 0 : ASSERT_FAIL (expr)) |
---|
529 | |
---|
530 | #if WANT_ASSERT |
---|
531 | #define ASSERT(expr) ASSERT_ALWAYS (expr) |
---|
532 | #define ASSERT_NOCARRY(expr) ASSERT_ALWAYS ((expr) == 0) |
---|
533 | |
---|
534 | #else |
---|
535 | #define ASSERT(expr) (CAST_TO_VOID 0) |
---|
536 | #define ASSERT_NOCARRY(expr) (expr) |
---|
537 | #endif |
---|
538 | |
---|
539 | |
---|
540 | #if HAVE_NATIVE_mpn_com_n |
---|
541 | #define mpn_com_n __MPN(com_n) |
---|
542 | void mpn_com_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t)); |
---|
543 | #else |
---|
544 | #define mpn_com_n(d,s,n) \ |
---|
545 | do \ |
---|
546 | { \ |
---|
547 | mp_ptr __d = (d); \ |
---|
548 | mp_srcptr __s = (s); \ |
---|
549 | mp_size_t __n = (n); \ |
---|
550 | do \ |
---|
551 | *__d++ = ~ *__s++; \ |
---|
552 | while (--__n); \ |
---|
553 | } \ |
---|
554 | while (0) |
---|
555 | #endif |
---|
556 | |
---|
557 | #define MPN_LOGOPS_N_INLINE(d,s1,s2,n,dop,op,s2op) \ |
---|
558 | do \ |
---|
559 | { \ |
---|
560 | mp_ptr __d = (d); \ |
---|
561 | mp_srcptr __s1 = (s1); \ |
---|
562 | mp_srcptr __s2 = (s2); \ |
---|
563 | mp_size_t __n = (n); \ |
---|
564 | do \ |
---|
565 | *__d++ = dop (*__s1++ op s2op *__s2++); \ |
---|
566 | while (--__n); \ |
---|
567 | } \ |
---|
568 | while (0) |
---|
569 | |
---|
570 | #if HAVE_NATIVE_mpn_and_n |
---|
571 | #define mpn_and_n __MPN(and_n) |
---|
572 | void mpn_and_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); |
---|
573 | #else |
---|
574 | #define mpn_and_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,&, ) |
---|
575 | #endif |
---|
576 | |
---|
577 | #if HAVE_NATIVE_mpn_andn_n |
---|
578 | #define mpn_andn_n __MPN(andn_n) |
---|
579 | void mpn_andn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); |
---|
580 | #else |
---|
581 | #define mpn_andn_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,&,~) |
---|
582 | #endif |
---|
583 | |
---|
584 | #if HAVE_NATIVE_mpn_nand_n |
---|
585 | #define mpn_nand_n __MPN(nand_n) |
---|
586 | void mpn_nand_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); |
---|
587 | #else |
---|
588 | #define mpn_nand_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,&, ) |
---|
589 | #endif |
---|
590 | |
---|
591 | #if HAVE_NATIVE_mpn_ior_n |
---|
592 | #define mpn_ior_n __MPN(ior_n) |
---|
593 | void mpn_ior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); |
---|
594 | #else |
---|
595 | #define mpn_ior_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,|, ) |
---|
596 | #endif |
---|
597 | |
---|
598 | #if HAVE_NATIVE_mpn_iorn_n |
---|
599 | #define mpn_iorn_n __MPN(iorn_n) |
---|
600 | void mpn_iorn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); |
---|
601 | #else |
---|
602 | #define mpn_iorn_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,|,~) |
---|
603 | #endif |
---|
604 | |
---|
605 | #if HAVE_NATIVE_mpn_nior_n |
---|
606 | #define mpn_nior_n __MPN(nior_n) |
---|
607 | void mpn_nior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); |
---|
608 | #else |
---|
609 | #define mpn_nior_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,|, ) |
---|
610 | #endif |
---|
611 | |
---|
612 | #if HAVE_NATIVE_mpn_xor_n |
---|
613 | #define mpn_xor_n __MPN(xor_n) |
---|
614 | void mpn_xor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); |
---|
615 | #else |
---|
616 | #define mpn_xor_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,^, ) |
---|
617 | #endif |
---|
618 | |
---|
619 | #if HAVE_NATIVE_mpn_xnor_n |
---|
620 | #define mpn_xnor_n __MPN(xnor_n) |
---|
621 | void mpn_xnor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); |
---|
622 | #else |
---|
623 | #define mpn_xnor_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,^, ) |
---|
624 | #endif |
---|
625 | |
---|
626 | /* Structure for conversion between internal binary format and |
---|
627 | strings in base 2..36. */ |
---|
628 | struct bases |
---|
629 | { |
---|
630 | /* Number of digits in the conversion base that always fits in an mp_limb_t. |
---|
631 | For example, for base 10 on a machine where a mp_limb_t has 32 bits this |
---|
632 | is 9, since 10**9 is the largest number that fits into a mp_limb_t. */ |
---|
633 | int chars_per_limb; |
---|
634 | |
---|
635 | /* log(2)/log(conversion_base) */ |
---|
636 | double chars_per_bit_exactly; |
---|
637 | |
---|
638 | /* base**chars_per_limb, i.e. the biggest number that fits a word, built by |
---|
639 | factors of base. Exception: For 2, 4, 8, etc, big_base is log2(base), |
---|
640 | i.e. the number of bits used to represent each digit in the base. */ |
---|
641 | mp_limb_t big_base; |
---|
642 | |
---|
643 | /* A BITS_PER_MP_LIMB bit approximation to 1/big_base, represented as a |
---|
644 | fixed-point number. Instead of dividing by big_base an application can |
---|
645 | choose to multiply by big_base_inverted. */ |
---|
646 | mp_limb_t big_base_inverted; |
---|
647 | }; |
---|
648 | |
---|
649 | #define __mp_bases __MPN(mp_bases) |
---|
650 | extern const struct bases __mp_bases[]; |
---|
651 | extern mp_size_t __gmp_default_fp_limb_precision; |
---|
652 | |
---|
653 | #if defined (__i386__) |
---|
654 | #define TARGET_REGISTER_STARVED 1 |
---|
655 | #else |
---|
656 | #define TARGET_REGISTER_STARVED 0 |
---|
657 | #endif |
---|
658 | |
---|
659 | /* Use a library function for invert_limb, if available. */ |
---|
660 | #if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb |
---|
661 | #define mpn_invert_limb __MPN(invert_limb) |
---|
662 | mp_limb_t mpn_invert_limb _PROTO ((mp_limb_t)); |
---|
663 | #define invert_limb(invxl,xl) (invxl = __MPN(invert_limb) (xl)) |
---|
664 | #endif |
---|
665 | |
---|
666 | #ifndef invert_limb |
---|
667 | #define invert_limb(invxl,xl) \ |
---|
668 | do { \ |
---|
669 | mp_limb_t dummy; \ |
---|
670 | if (xl << 1 == 0) \ |
---|
671 | invxl = ~(mp_limb_t) 0; \ |
---|
672 | else \ |
---|
673 | udiv_qrnnd (invxl, dummy, -xl, 0, xl); \ |
---|
674 | } while (0) |
---|
675 | #endif |
---|
676 | |
---|
677 | /* Divide the two-limb number in (NH,,NL) by D, with DI being the largest |
---|
678 | limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB). |
---|
679 | If this would yield overflow, DI should be the largest possible number |
---|
680 | (i.e., only ones). For correct operation, the most significant bit of D |
---|
681 | has to be set. Put the quotient in Q and the remainder in R. */ |
---|
682 | #define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \ |
---|
683 | do { \ |
---|
684 | mp_limb_t _q, _ql, _r; \ |
---|
685 | mp_limb_t _xh, _xl; \ |
---|
686 | umul_ppmm (_q, _ql, (nh), (di)); \ |
---|
687 | _q += (nh); /* DI is 2**BITS_PER_MP_LIMB too small */\ |
---|
688 | umul_ppmm (_xh, _xl, _q, (d)); \ |
---|
689 | sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl); \ |
---|
690 | if (_xh != 0) \ |
---|
691 | { \ |
---|
692 | sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \ |
---|
693 | _q += 1; \ |
---|
694 | if (_xh != 0) \ |
---|
695 | { \ |
---|
696 | sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \ |
---|
697 | _q += 1; \ |
---|
698 | } \ |
---|
699 | } \ |
---|
700 | if (_r >= (d)) \ |
---|
701 | { \ |
---|
702 | _r -= (d); \ |
---|
703 | _q += 1; \ |
---|
704 | } \ |
---|
705 | (r) = _r; \ |
---|
706 | (q) = _q; \ |
---|
707 | } while (0) |
---|
708 | /* Like udiv_qrnnd_preinv, but for for any value D. DNORM is D shifted left |
---|
709 | so that its most significant bit is set. LGUP is ceil(log2(D)). */ |
---|
710 | #define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \ |
---|
711 | do { \ |
---|
712 | mp_limb_t _n2, _n10, _n1, _nadj, _q1; \ |
---|
713 | mp_limb_t _xh, _xl; \ |
---|
714 | _n2 = ((nh) << (BITS_PER_MP_LIMB - (lgup))) + ((nl) >> 1 >> (l - 1));\ |
---|
715 | _n10 = (nl) << (BITS_PER_MP_LIMB - (lgup)); \ |
---|
716 | _n1 = ((mp_limb_signed_t) _n10 >> (BITS_PER_MP_LIMB - 1)); \ |
---|
717 | _nadj = _n10 + (_n1 & (dnorm)); \ |
---|
718 | umul_ppmm (_xh, _xl, di, _n2 - _n1); \ |
---|
719 | add_ssaaaa (_xh, _xl, _xh, _xl, 0, _nadj); \ |
---|
720 | _q1 = ~(_n2 + _xh); \ |
---|
721 | umul_ppmm (_xh, _xl, _q1, d); \ |
---|
722 | add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \ |
---|
723 | _xh -= (d); \ |
---|
724 | (r) = _xl + ((d) & _xh); \ |
---|
725 | (q) = _xh - _q1; \ |
---|
726 | } while (0) |
---|
727 | /* Exactly like udiv_qrnnd_preinv, but branch-free. It is not clear which |
---|
728 | version to use. */ |
---|
729 | #define udiv_qrnnd_preinv2norm(q, r, nh, nl, d, di) \ |
---|
730 | do { \ |
---|
731 | mp_limb_t _n2, _n10, _n1, _nadj, _q1; \ |
---|
732 | mp_limb_t _xh, _xl; \ |
---|
733 | _n2 = (nh); \ |
---|
734 | _n10 = (nl); \ |
---|
735 | _n1 = ((mp_limb_signed_t) _n10 >> (BITS_PER_MP_LIMB - 1)); \ |
---|
736 | _nadj = _n10 + (_n1 & (d)); \ |
---|
737 | umul_ppmm (_xh, _xl, di, _n2 - _n1); \ |
---|
738 | add_ssaaaa (_xh, _xl, _xh, _xl, 0, _nadj); \ |
---|
739 | _q1 = ~(_n2 + _xh); \ |
---|
740 | umul_ppmm (_xh, _xl, _q1, d); \ |
---|
741 | add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \ |
---|
742 | _xh -= (d); \ |
---|
743 | (r) = _xl + ((d) & _xh); \ |
---|
744 | (q) = _xh - _q1; \ |
---|
745 | } while (0) |
---|
746 | |
---|
747 | |
---|
748 | /* modlimb_invert() sets "inv" to the multiplicative inverse of "n" modulo |
---|
749 | 2^BITS_PER_MP_LIMB, ie. so that inv*n == 1 mod 2^BITS_PER_MP_LIMB. |
---|
750 | "n" must be odd (otherwise such an inverse doesn't exist). |
---|
751 | |
---|
752 | This is not to be confused with invert_limb(), which is completely |
---|
753 | different. |
---|
754 | |
---|
755 | The table lookup gives an inverse with the low 8 bits valid, and each |
---|
756 | multiply step doubles the number of bits. See Jebelean's exact division |
---|
757 | paper, end of section 4 (reference in gmp.texi). */ |
---|
758 | |
---|
759 | #define modlimb_invert_table __gmp_modlimb_invert_table |
---|
760 | extern const unsigned char modlimb_invert_table[128]; |
---|
761 | |
---|
762 | #if BITS_PER_MP_LIMB <= 32 |
---|
763 | #define modlimb_invert(inv,n) \ |
---|
764 | do { \ |
---|
765 | mp_limb_t __n = (n); \ |
---|
766 | mp_limb_t __inv; \ |
---|
767 | ASSERT ((__n & 1) == 1); \ |
---|
768 | __inv = modlimb_invert_table[(__n&0xFF)/2]; /* 8 */ \ |
---|
769 | __inv = 2 * __inv - __inv * __inv * __n; /* 16 */ \ |
---|
770 | __inv = 2 * __inv - __inv * __inv * __n; /* 32 */ \ |
---|
771 | ASSERT (__inv * __n == 1); \ |
---|
772 | (inv) = __inv; \ |
---|
773 | } while (0) |
---|
774 | #endif |
---|
775 | |
---|
776 | #if BITS_PER_MP_LIMB > 32 && BITS_PER_MP_LIMB <= 64 |
---|
777 | #define modlimb_invert(inv,n) \ |
---|
778 | do { \ |
---|
779 | mp_limb_t __n = (n); \ |
---|
780 | mp_limb_t __inv; \ |
---|
781 | ASSERT ((__n & 1) == 1); \ |
---|
782 | __inv = modlimb_invert_table[(__n&0xFF)/2]; /* 8 */ \ |
---|
783 | __inv = 2 * __inv - __inv * __inv * __n; /* 16 */ \ |
---|
784 | __inv = 2 * __inv - __inv * __inv * __n; /* 32 */ \ |
---|
785 | __inv = 2 * __inv - __inv * __inv * __n; /* 64 */ \ |
---|
786 | ASSERT (__inv * __n == 1); \ |
---|
787 | (inv) = __inv; \ |
---|
788 | } while (0) |
---|
789 | #endif |
---|
790 | |
---|
791 | |
---|
792 | /* The `mode' attribute was introduced in GCC 2.2, but we can only distinguish |
---|
793 | between GCC 2 releases from 2.5, since __GNUC_MINOR__ wasn't introduced |
---|
794 | until then. */ |
---|
795 | #if (__GNUC__ - 0 > 2 || defined (__GNUC_MINOR__)) && ! defined (__APPLE_CC__) |
---|
796 | /* Define stuff for longlong.h. */ |
---|
797 | typedef unsigned int UQItype __attribute__ ((mode (QI))); |
---|
798 | typedef int SItype __attribute__ ((mode (SI))); |
---|
799 | typedef unsigned int USItype __attribute__ ((mode (SI))); |
---|
800 | typedef int DItype __attribute__ ((mode (DI))); |
---|
801 | typedef unsigned int UDItype __attribute__ ((mode (DI))); |
---|
802 | #else |
---|
803 | typedef unsigned char UQItype; |
---|
804 | typedef long SItype; |
---|
805 | typedef unsigned long USItype; |
---|
806 | #if defined _LONGLONG || defined _LONG_LONG_LIMB |
---|
807 | typedef long long int DItype; |
---|
808 | typedef unsigned long long int UDItype; |
---|
809 | #else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */ |
---|
810 | typedef long int DItype; |
---|
811 | typedef unsigned long int UDItype; |
---|
812 | #endif |
---|
813 | #endif |
---|
814 | |
---|
815 | typedef mp_limb_t UWtype; |
---|
816 | typedef unsigned int UHWtype; |
---|
817 | #define W_TYPE_SIZE BITS_PER_MP_LIMB |
---|
818 | |
---|
819 | /* Define ieee_double_extract and _GMP_IEEE_FLOATS. */ |
---|
820 | |
---|
821 | #if (defined (__arm__) && (defined (__ARMWEL__) || defined (__linux__))) |
---|
822 | /* Special case for little endian ARM since floats remain in big-endian. */ |
---|
823 | #define _GMP_IEEE_FLOATS 1 |
---|
824 | union ieee_double_extract |
---|
825 | { |
---|
826 | struct |
---|
827 | { |
---|
828 | unsigned int manh:20; |
---|
829 | unsigned int exp:11; |
---|
830 | unsigned int sig:1; |
---|
831 | unsigned int manl:32; |
---|
832 | } s; |
---|
833 | double d; |
---|
834 | }; |
---|
835 | #else |
---|
836 | #if defined (_LITTLE_ENDIAN) || defined (__LITTLE_ENDIAN__) \ |
---|
837 | || defined (__alpha) \ |
---|
838 | || defined (__clipper__) \ |
---|
839 | || defined (__cris) \ |
---|
840 | || defined (__i386__) \ |
---|
841 | || defined (__i860__) \ |
---|
842 | || defined (__i960__) \ |
---|
843 | || defined (MIPSEL) || defined (_MIPSEL) \ |
---|
844 | || defined (__ns32000__) \ |
---|
845 | || defined (__WINNT) || defined (_WIN32) |
---|
846 | #define _GMP_IEEE_FLOATS 1 |
---|
847 | union ieee_double_extract |
---|
848 | { |
---|
849 | struct |
---|
850 | { |
---|
851 | unsigned int manl:32; |
---|
852 | unsigned int manh:20; |
---|
853 | unsigned int exp:11; |
---|
854 | unsigned int sig:1; |
---|
855 | } s; |
---|
856 | double d; |
---|
857 | }; |
---|
858 | #else /* Need this as an #else since the tests aren't made exclusive. */ |
---|
859 | #if defined (_BIG_ENDIAN) || defined (__BIG_ENDIAN__) \ |
---|
860 | || defined (__a29k__) || defined (_AM29K) \ |
---|
861 | || defined (__arm__) \ |
---|
862 | || (defined (__convex__) && defined (_IEEE_FLOAT_)) \ |
---|
863 | || defined (_CRAYMPP) \ |
---|
864 | || defined (__i370__) || defined (__mvs__) \ |
---|
865 | || defined (__mc68000__) || defined (__mc68020__) || defined (__m68k__)\ |
---|
866 | || defined(mc68020) \ |
---|
867 | || defined (__m88000__) \ |
---|
868 | || defined (MIPSEB) || defined (_MIPSEB) \ |
---|
869 | || defined (__hppa) || defined (__hppa__) \ |
---|
870 | || defined (__pyr__) \ |
---|
871 | || defined (__ibm032__) \ |
---|
872 | || defined (_IBMR2) || defined (_ARCH_PPC) \ |
---|
873 | || defined (__sh__) \ |
---|
874 | || defined (__sparc) || defined (sparc) \ |
---|
875 | || defined (__we32k__) |
---|
876 | #define _GMP_IEEE_FLOATS 1 |
---|
877 | union ieee_double_extract |
---|
878 | { |
---|
879 | struct |
---|
880 | { |
---|
881 | unsigned int sig:1; |
---|
882 | unsigned int exp:11; |
---|
883 | unsigned int manh:20; |
---|
884 | unsigned int manl:32; |
---|
885 | } s; |
---|
886 | double d; |
---|
887 | }; |
---|
888 | #endif |
---|
889 | #endif |
---|
890 | #endif |
---|
891 | |
---|
892 | /* Using "(2.0 * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1)))" doesn't work on |
---|
893 | SunOS 4.1.4 native /usr/ucb/cc (K&R), it comes out as -4294967296.0, |
---|
894 | presumably due to treating the mp_limb_t constant as signed rather than |
---|
895 | unsigned. */ |
---|
896 | #define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 2))) |
---|
897 | #if BITS_PER_MP_LIMB == 64 |
---|
898 | #define LIMBS_PER_DOUBLE 2 |
---|
899 | #else |
---|
900 | #define LIMBS_PER_DOUBLE 3 |
---|
901 | #endif |
---|
902 | |
---|
903 | double __gmp_scale2 _PROTO ((double, int)); |
---|
904 | int __gmp_extract_double _PROTO ((mp_ptr, double)); |
---|
905 | |
---|
906 | extern int __gmp_junk; |
---|
907 | extern const int __gmp_0; |
---|
908 | #define GMP_ERROR(code) (gmp_errno |= (code), __gmp_junk = 10/__gmp_0) |
---|
909 | #define DIVIDE_BY_ZERO GMP_ERROR(GMP_ERROR_DIVISION_BY_ZERO) |
---|
910 | #define SQRT_OF_NEGATIVE GMP_ERROR(GMP_ERROR_SQRT_OF_NEGATIVE) |
---|
911 | |
---|
912 | #if defined _LONG_LONG_LIMB |
---|
913 | #if defined (__STDC__) |
---|
914 | #define CNST_LIMB(C) C##LL |
---|
915 | #else |
---|
916 | #define CNST_LIMB(C) C/**/LL |
---|
917 | #endif |
---|
918 | #else /* not _LONG_LONG_LIMB */ |
---|
919 | #if defined (__STDC__) |
---|
920 | #define CNST_LIMB(C) C##L |
---|
921 | #else |
---|
922 | #define CNST_LIMB(C) C/**/L |
---|
923 | #endif |
---|
924 | #endif /* _LONG_LONG_LIMB */ |
---|
925 | |
---|
926 | /*** Stuff used by mpn/generic/prefsqr.c and mpn/generic/next_prime.c ***/ |
---|
927 | #if BITS_PER_MP_LIMB == 32 |
---|
928 | #define PP 0xC0CFD797L /* 3 x 5 x 7 x 11 x 13 x ... x 29 */ |
---|
929 | #define PP_INVERTED 0x53E5645CL |
---|
930 | #define PP_MAXPRIME 29 |
---|
931 | #define PP_MASK 0x208A28A8L |
---|
932 | #endif |
---|
933 | |
---|
934 | #if BITS_PER_MP_LIMB == 64 |
---|
935 | #define PP CNST_LIMB(0xE221F97C30E94E1D) /* 3 x 5 x 7 x 11 x 13 x ... x 53 */ |
---|
936 | #define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B) |
---|
937 | #define PP_MAXPRIME 53 |
---|
938 | #define PP_MASK CNST_LIMB(0x208A20A08A28A8) |
---|
939 | #endif |
---|
940 | |
---|
941 | |
---|
942 | /* BIT1 means a result value in bit 1 (second least significant bit), with a |
---|
943 | zero bit representing +1 and a one bit representing -1. Bits other than |
---|
944 | bit 1 are garbage. |
---|
945 | |
---|
946 | JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base |
---|
947 | and their speed is important. Expressions are used rather than |
---|
948 | conditionals to accumulate sign changes, which effectively means XORs |
---|
949 | instead of conditional JUMPs. */ |
---|
950 | |
---|
951 | /* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */ |
---|
952 | #define JACOBI_S0(a) \ |
---|
953 | (((a) == 1) | ((a) == -1)) |
---|
954 | |
---|
955 | /* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */ |
---|
956 | #define JACOBI_U0(a) \ |
---|
957 | ((a) == 1) |
---|
958 | |
---|
959 | /* (a/0), with a an mpz_t; is 1 if a=+/-1, 0 otherwise |
---|
960 | An mpz_t always has at least one limb of allocated space, so the fetch of |
---|
961 | the low limb is valid. */ |
---|
962 | #define JACOBI_Z0(a) \ |
---|
963 | (((SIZ(a) == 1) | (SIZ(a) == -1)) & (PTR(a)[0] == 1)) |
---|
964 | |
---|
965 | /* Convert a bit1 to +1 or -1. */ |
---|
966 | #define JACOBI_BIT1_TO_PN(result_bit1) \ |
---|
967 | (1 - ((result_bit1) & 2)) |
---|
968 | |
---|
969 | /* (2/b), with b unsigned and odd; |
---|
970 | is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and |
---|
971 | hence obtained from (b>>1)^b */ |
---|
972 | #define JACOBI_TWO_U_BIT1(b) \ |
---|
973 | (ASSERT (b & 1), (((b) >> 1) ^ (b))) |
---|
974 | |
---|
975 | /* (2/b)^twos, with b unsigned and odd */ |
---|
976 | #define JACOBI_TWOS_U_BIT1(twos, b) \ |
---|
977 | (((twos) << 1) & JACOBI_TWO_U_BIT1 (b)) |
---|
978 | |
---|
979 | /* (2/b)^twos, with b unsigned and odd */ |
---|
980 | #define JACOBI_TWOS_U(twos, b) \ |
---|
981 | (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b))) |
---|
982 | |
---|
983 | /* (a/b) effect due to sign of a: signed/unsigned, b odd; |
---|
984 | is (-1)^((b-1)/2) if a<0, or +1 if a>=0 */ |
---|
985 | #define JACOBI_ASGN_SU_BIT1(a, b) \ |
---|
986 | ((((a) < 0) << 1) & (b)) |
---|
987 | |
---|
988 | /* (a/b) effect due to sign of b: signed/mpz; |
---|
989 | is -1 if a and b both negative, +1 otherwise */ |
---|
990 | #define JACOBI_BSGN_SZ_BIT1(a, b) \ |
---|
991 | ((((a) < 0) & (SIZ(b) < 0)) << 1) |
---|
992 | |
---|
993 | /* (a/b) effect due to sign of b: mpz/signed */ |
---|
994 | #define JACOBI_BSGN_ZS_BIT1(a, b) \ |
---|
995 | JACOBI_BSGN_SZ_BIT1(b, a) |
---|
996 | |
---|
997 | /* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd. |
---|
998 | Is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4 or -1 if |
---|
999 | both a,b==3mod4, achieved in bit 1 by a&b. No ASSERT()s about a,b odd |
---|
1000 | because this is used in a couple of places with only bit 1 of a or b |
---|
1001 | valid. */ |
---|
1002 | #define JACOBI_RECIP_UU_BIT1(a, b) \ |
---|
1003 | ((a) & (b)) |
---|
1004 | |
---|
1005 | |
---|
1006 | /* For testing and debugging. */ |
---|
1007 | #define MPZ_CHECK_FORMAT(z) \ |
---|
1008 | (ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0), \ |
---|
1009 | ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z))) |
---|
1010 | #define MPZ_PROVOKE_REALLOC(z) \ |
---|
1011 | do { ALLOC(z) = ABSIZ(z); } while (0) |
---|
1012 | |
---|
1013 | |
---|
1014 | #if TUNE_PROGRAM_BUILD |
---|
1015 | /* Some extras wanted when recompiling some .c files for use by the tune |
---|
1016 | program. Not part of a normal build. */ |
---|
1017 | |
---|
1018 | extern mp_size_t mul_threshold[]; |
---|
1019 | extern mp_size_t fft_modf_mul_threshold; |
---|
1020 | extern mp_size_t sqr_threshold[]; |
---|
1021 | extern mp_size_t fft_modf_sqr_threshold; |
---|
1022 | extern mp_size_t bz_threshold[]; |
---|
1023 | extern mp_size_t fib_threshold[]; |
---|
1024 | extern mp_size_t powm_threshold[]; |
---|
1025 | extern mp_size_t gcd_accel_threshold[]; |
---|
1026 | extern mp_size_t gcdext_threshold[]; |
---|
1027 | |
---|
1028 | #undef KARATSUBA_MUL_THRESHOLD |
---|
1029 | #undef TOOM3_MUL_THRESHOLD |
---|
1030 | #undef FFT_MUL_TABLE |
---|
1031 | #undef FFT_MUL_THRESHOLD |
---|
1032 | #undef FFT_MODF_MUL_THRESHOLD |
---|
1033 | #undef KARATSUBA_SQR_THRESHOLD |
---|
1034 | #undef TOOM3_SQR_THRESHOLD |
---|
1035 | #undef FFT_SQR_TABLE |
---|
1036 | #undef FFT_SQR_THRESHOLD |
---|
1037 | #undef FFT_MODF_SQR_THRESHOLD |
---|
1038 | #undef BZ_THRESHOLD |
---|
1039 | #undef FIB_THRESHOLD |
---|
1040 | #undef POWM_THRESHOLD |
---|
1041 | #undef GCD_ACCEL_THRESHOLD |
---|
1042 | #undef GCDEXT_THRESHOLD |
---|
1043 | |
---|
1044 | #define KARATSUBA_MUL_THRESHOLD mul_threshold[0] |
---|
1045 | #define TOOM3_MUL_THRESHOLD mul_threshold[1] |
---|
1046 | #define FFT_MUL_TABLE 0 |
---|
1047 | #define FFT_MUL_THRESHOLD mul_threshold[2] |
---|
1048 | #define FFT_MODF_MUL_THRESHOLD fft_modf_mul_threshold |
---|
1049 | #define KARATSUBA_SQR_THRESHOLD sqr_threshold[0] |
---|
1050 | #define TOOM3_SQR_THRESHOLD sqr_threshold[1] |
---|
1051 | #define FFT_SQR_TABLE 0 |
---|
1052 | #define FFT_SQR_THRESHOLD sqr_threshold[2] |
---|
1053 | #define FFT_MODF_SQR_THRESHOLD fft_modf_sqr_threshold |
---|
1054 | #define BZ_THRESHOLD bz_threshold[0] |
---|
1055 | #define FIB_THRESHOLD fib_threshold[0] |
---|
1056 | #define POWM_THRESHOLD powm_threshold[0] |
---|
1057 | #define GCD_ACCEL_THRESHOLD gcd_accel_threshold[0] |
---|
1058 | #define GCDEXT_THRESHOLD gcdext_threshold[0] |
---|
1059 | |
---|
1060 | #define TOOM3_MUL_THRESHOLD_LIMIT 700 |
---|
1061 | |
---|
1062 | #undef FFT_TABLE_ATTRS |
---|
1063 | #define FFT_TABLE_ATTRS |
---|
1064 | extern mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE]; |
---|
1065 | |
---|
1066 | #endif /* TUNE_PROGRAM_BUILD */ |
---|
1067 | |
---|
1068 | #if defined (__cplusplus) |
---|
1069 | } |
---|
1070 | #endif |
---|