1 | /* Include file for internal GNU MP types and definitions. |
---|
2 | |
---|
3 | THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO |
---|
4 | BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES. |
---|
5 | |
---|
6 | Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003, |
---|
7 | 2004 Free Software Foundation, Inc. |
---|
8 | |
---|
9 | This file is part of the GNU MP Library. |
---|
10 | |
---|
11 | The GNU MP Library is free software; you can redistribute it and/or modify |
---|
12 | it under the terms of the GNU Lesser General Public License as published by |
---|
13 | the Free Software Foundation; either version 2.1 of the License, or (at your |
---|
14 | option) any later version. |
---|
15 | |
---|
16 | The GNU MP Library is distributed in the hope that it will be useful, but |
---|
17 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
---|
18 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
---|
19 | License for more details. |
---|
20 | |
---|
21 | You should have received a copy of the GNU Lesser General Public License |
---|
22 | along with the GNU MP Library; see the file COPYING.LIB. If not, write to |
---|
23 | the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
---|
24 | MA 02111-1307, USA. */ |
---|
25 | |
---|
26 | |
---|
27 | #ifndef __GMP_IMPL_H__ |
---|
28 | #define __GMP_IMPL_H__ |
---|
29 | |
---|
30 | /* On Cray vector systems "short" and "unsigned short" might not be the same |
---|
31 | number of bits, making the SHRT_MAX defaults below fail. (This depends |
---|
32 | on compiler options.) Instead use limits.h. */ |
---|
33 | #if defined _CRAY |
---|
34 | #include <limits.h> |
---|
35 | #endif |
---|
36 | |
---|
37 | #if ! __GMP_WITHIN_CONFIGURE |
---|
38 | #include "config.h" |
---|
39 | #include "gmp-mparam.h" |
---|
40 | #endif |
---|
41 | |
---|
42 | #ifdef __cplusplus |
---|
43 | #include <string> |
---|
44 | #endif |
---|
45 | |
---|
46 | /* Might search and replace _PROTO to __GMP_PROTO internally one day, to |
---|
47 | avoid two names for one thing, but no hurry for that. */ |
---|
48 | #define _PROTO(x) __GMP_PROTO(x) |
---|
49 | |
---|
50 | /* The following tries to get a good version of alloca. The tests are |
---|
51 | adapted from autoconf AC_FUNC_ALLOCA, with a couple of additions. |
---|
52 | Whether this succeeds is tested by GMP_FUNC_ALLOCA and HAVE_ALLOCA will |
---|
53 | be setup appropriately. |
---|
54 | |
---|
55 | ifndef alloca - a cpp define might already exist. |
---|
56 | glibc <stdlib.h> includes <alloca.h> which uses GCC __builtin_alloca. |
---|
57 | HP cc +Olibcalls adds a #define of alloca to __builtin_alloca. |
---|
58 | |
---|
59 | GCC __builtin_alloca - preferred whenever available. |
---|
60 | |
---|
61 | _AIX pragma - IBM compilers need a #pragma in "each module that needs to |
---|
62 | use alloca". Pragma indented to protect pre-ANSI cpp's. _IBMR2 was |
---|
63 | used in past versions of GMP, retained still in case it matters. |
---|
64 | |
---|
65 | The autoconf manual says this pragma needs to be at the start of a C |
---|
66 | file, apart from comments and preprocessor directives. Is that true? |
---|
67 | xlc on aix 4.xxx doesn't seem to mind it being after prototypes etc |
---|
68 | from gmp.h. |
---|
69 | */ |
---|
70 | |
---|
71 | #ifndef alloca |
---|
72 | # ifdef __GNUC__ |
---|
73 | # define alloca __builtin_alloca |
---|
74 | # else |
---|
75 | # ifdef __DECC |
---|
76 | # define alloca(x) __ALLOCA(x) |
---|
77 | # else |
---|
78 | # ifdef _MSC_VER |
---|
79 | # include <malloc.h> |
---|
80 | # define alloca _alloca |
---|
81 | # else |
---|
82 | # if HAVE_ALLOCA_H |
---|
83 | # include <alloca.h> |
---|
84 | # else |
---|
85 | # if defined (_AIX) || defined (_IBMR2) |
---|
86 | #pragma alloca |
---|
87 | # else |
---|
88 | char *alloca (); |
---|
89 | # endif |
---|
90 | # endif |
---|
91 | # endif |
---|
92 | # endif |
---|
93 | # endif |
---|
94 | #endif |
---|
95 | |
---|
96 | |
---|
97 | /* const and signed must match __gmp_const and __gmp_signed, so follow the |
---|
98 | decision made for those in gmp.h. */ |
---|
99 | #if ! __GMP_HAVE_CONST |
---|
100 | #define const /* empty */ |
---|
101 | #define signed /* empty */ |
---|
102 | #endif |
---|
103 | |
---|
104 | |
---|
105 | /* "const" basically means a function does nothing but examine its arguments |
---|
106 | and give a return value, it doesn't read or write any memory (neither |
---|
107 | global nor pointed to by arguments), and has no other side-effects. This |
---|
108 | is more restrictive than "pure". See info node "(gcc)Function |
---|
109 | Attributes". */ |
---|
110 | #if HAVE_ATTRIBUTE_CONST |
---|
111 | #define ATTRIBUTE_CONST __attribute__ ((const)) |
---|
112 | #else |
---|
113 | #define ATTRIBUTE_CONST |
---|
114 | #endif |
---|
115 | |
---|
116 | #if HAVE_ATTRIBUTE_NORETURN |
---|
117 | #define ATTRIBUTE_NORETURN __attribute__ ((noreturn)) |
---|
118 | #else |
---|
119 | #define ATTRIBUTE_NORETURN |
---|
120 | #endif |
---|
121 | |
---|
122 | /* "malloc" means a function behaves like malloc in that the pointer it |
---|
123 | returns doesn't alias anything. */ |
---|
124 | #if HAVE_ATTRIBUTE_MALLOC |
---|
125 | #define ATTRIBUTE_MALLOC __attribute__ ((malloc)) |
---|
126 | #else |
---|
127 | #define ATTRIBUTE_MALLOC |
---|
128 | #endif |
---|
129 | |
---|
130 | #ifdef _CRAY |
---|
131 | #define CRAY_Pragma(str) _Pragma (str) |
---|
132 | #else |
---|
133 | #define CRAY_Pragma(str) |
---|
134 | #endif |
---|
135 | |
---|
136 | |
---|
137 | #if ! HAVE_STRCHR |
---|
138 | #define strchr(s,c) index(s,c) |
---|
139 | #endif |
---|
140 | |
---|
141 | #if ! HAVE_MEMSET |
---|
142 | #define memset(p, c, n) \ |
---|
143 | do { \ |
---|
144 | ASSERT (n >= 0); \ |
---|
145 | int __i; \ |
---|
146 | for (__i = 0; __i < (n); __i++) \ |
---|
147 | (p)[__i] = (c); \ |
---|
148 | } while (0) |
---|
149 | #endif |
---|
150 | |
---|
151 | /* va_copy is standard in C99, and gcc provides __va_copy when in strict C89 |
---|
152 | mode. Falling back to a memcpy will give maximum portability, since it |
---|
153 | works no matter whether va_list is a pointer, struct or array. */ |
---|
154 | #if ! defined (va_copy) && defined (__va_copy) |
---|
155 | #define va_copy(dst,src) __va_copy(dst,src) |
---|
156 | #endif |
---|
157 | #if ! defined (va_copy) |
---|
158 | #define va_copy(dst,src) \ |
---|
159 | do { memcpy (&(dst), &(src), sizeof (va_list)); } while (0) |
---|
160 | #endif |
---|
161 | |
---|
162 | |
---|
163 | #if defined (__cplusplus) |
---|
164 | extern "C" { |
---|
165 | #endif |
---|
166 | |
---|
167 | |
---|
168 | /* Usage: TMP_DECL (marker); |
---|
169 | TMP_MARK (marker); |
---|
170 | ptr = TMP_ALLOC (bytes); |
---|
171 | TMP_FREE (marker); |
---|
172 | |
---|
173 | TMP_DECL just declares a variable, but might be empty and so must be last |
---|
174 | in a list of variables. TMP_MARK must be done before any TMP_ALLOC. |
---|
175 | TMP_ALLOC(0) is not allowed. TMP_FREE doesn't need to be done if a |
---|
176 | TMP_MARK was made, but then no TMP_ALLOCs. |
---|
177 | |
---|
178 | The name "marker" isn't used by the malloc-reentrant and debug methods, |
---|
179 | instead they hardcode a name which TMP_ALLOC will know. For that reason |
---|
180 | two TMP_DECLs are not allowed, unless one is in a nested "{ }" block, and |
---|
181 | in that case TMP_MARK, TMP_ALLOC and TMP_FREE will refer to the TMP_DECL |
---|
182 | which is in scope, irrespective of the marker name given. */ |
---|
183 | |
---|
184 | /* The alignment in bytes, used for TMP_ALLOCed blocks, when alloca or |
---|
185 | __gmp_allocate_func doesn't already determine it. Currently TMP_ALLOC |
---|
186 | isn't used for "double"s, so that's not in the union. */ |
---|
187 | union tmp_align_t { |
---|
188 | mp_limb_t l; |
---|
189 | char *p; |
---|
190 | }; |
---|
191 | #define __TMP_ALIGN sizeof (union tmp_align_t) |
---|
192 | |
---|
193 | /* Return "a" rounded upwards to a multiple of "m", if it isn't already. |
---|
194 | "a" must be an unsigned type. */ |
---|
195 | #define ROUND_UP_MULTIPLE(a,m) ((a) + (-(a))%(m)) |
---|
196 | |
---|
197 | #if WANT_TMP_ALLOCA |
---|
198 | /* Each TMP_ALLOC is simply an alloca(), and nothing else is needed. |
---|
199 | This is the preferred method. */ |
---|
200 | #define TMP_DECL(m) |
---|
201 | #define TMP_ALLOC(x) alloca(x) |
---|
202 | #define TMP_MARK(m) |
---|
203 | #define TMP_FREE(m) |
---|
204 | #endif |
---|
205 | |
---|
206 | #if WANT_TMP_REENTRANT |
---|
207 | /* See tal-reent.c for some comments. */ |
---|
208 | struct tmp_reentrant_t { |
---|
209 | struct tmp_reentrant_t *next; |
---|
210 | size_t size; /* bytes, including header */ |
---|
211 | }; |
---|
212 | void *__gmp_tmp_reentrant_alloc _PROTO ((struct tmp_reentrant_t **, size_t)) ATTRIBUTE_MALLOC; |
---|
213 | void __gmp_tmp_reentrant_free _PROTO ((struct tmp_reentrant_t *)); |
---|
214 | #define TMP_DECL(marker) struct tmp_reentrant_t *__tmp_marker |
---|
215 | /* don't demand NULL, just cast a zero */ |
---|
216 | #define TMP_MARK(marker) \ |
---|
217 | do { __tmp_marker = (struct tmp_reentrant_t *) 0; } while (0) |
---|
218 | #define TMP_ALLOC(size) __gmp_tmp_reentrant_alloc (&__tmp_marker, size) |
---|
219 | #define TMP_FREE(marker) __gmp_tmp_reentrant_free (__tmp_marker) |
---|
220 | #endif |
---|
221 | |
---|
222 | #if WANT_TMP_NOTREENTRANT |
---|
223 | /* See tal-notreent.c for some comments. */ |
---|
224 | struct tmp_marker |
---|
225 | { |
---|
226 | struct tmp_stack *which_chunk; |
---|
227 | void *alloc_point; |
---|
228 | }; |
---|
229 | typedef struct tmp_marker tmp_marker; |
---|
230 | void *__gmp_tmp_alloc _PROTO ((unsigned long)) ATTRIBUTE_MALLOC; |
---|
231 | void __gmp_tmp_mark _PROTO ((tmp_marker *)); |
---|
232 | void __gmp_tmp_free _PROTO ((tmp_marker *)); |
---|
233 | #define TMP_DECL(marker) tmp_marker marker |
---|
234 | /* gcc recognises "(-(8*n))%8" or the like is always zero, which means the |
---|
235 | rounding up is a noop for allocs of whole limbs. */ |
---|
236 | #define TMP_ALLOC(size) \ |
---|
237 | __gmp_tmp_alloc (ROUND_UP_MULTIPLE ((unsigned long) (size), __TMP_ALIGN)) |
---|
238 | #define TMP_MARK(marker) __gmp_tmp_mark (&marker) |
---|
239 | #define TMP_FREE(marker) __gmp_tmp_free (&marker) |
---|
240 | #endif |
---|
241 | |
---|
242 | #if WANT_TMP_DEBUG |
---|
243 | /* See tal-debug.c for some comments. */ |
---|
244 | struct tmp_debug_t { |
---|
245 | struct tmp_debug_entry_t *list; |
---|
246 | const char *file; |
---|
247 | int line; |
---|
248 | }; |
---|
249 | struct tmp_debug_entry_t { |
---|
250 | struct tmp_debug_entry_t *next; |
---|
251 | char *block; |
---|
252 | size_t size; |
---|
253 | }; |
---|
254 | void __gmp_tmp_debug_mark _PROTO ((const char *, int, struct tmp_debug_t **, |
---|
255 | struct tmp_debug_t *, |
---|
256 | const char *, const char *)); |
---|
257 | void *__gmp_tmp_debug_alloc _PROTO ((const char *, int, int, |
---|
258 | struct tmp_debug_t **, const char *, |
---|
259 | size_t)) ATTRIBUTE_MALLOC; |
---|
260 | void __gmp_tmp_debug_free _PROTO ((const char *, int, int, |
---|
261 | struct tmp_debug_t **, |
---|
262 | const char *, const char *)); |
---|
263 | #if HAVE_STRINGIZE |
---|
264 | #define TMP_DECL(marker) TMP_DECL_NAME(marker, #marker) |
---|
265 | #define TMP_MARK(marker) TMP_MARK_NAME(marker, #marker) |
---|
266 | #define TMP_FREE(marker) TMP_FREE_NAME(marker, #marker) |
---|
267 | #else |
---|
268 | #define TMP_DECL(marker) TMP_DECL_NAME(marker, "marker") |
---|
269 | #define TMP_MARK(marker) TMP_MARK_NAME(marker, "marker") |
---|
270 | #define TMP_FREE(marker) TMP_FREE_NAME(marker, "marker") |
---|
271 | #endif |
---|
272 | /* The marker variable is designed to provoke an uninitialized varialble |
---|
273 | warning from the compiler if TMP_FREE is used without a TMP_MARK. |
---|
274 | __tmp_marker_inscope does the same for TMP_ALLOC. Runtime tests pick |
---|
275 | these things up too. */ |
---|
276 | #define TMP_DECL_NAME(marker, marker_name) \ |
---|
277 | int marker; \ |
---|
278 | int __tmp_marker_inscope; \ |
---|
279 | const char *__tmp_marker_name = marker_name; \ |
---|
280 | struct tmp_debug_t __tmp_marker_struct; \ |
---|
281 | /* don't demand NULL, just cast a zero */ \ |
---|
282 | struct tmp_debug_t *__tmp_marker = (struct tmp_debug_t *) 0 |
---|
283 | #define TMP_MARK_NAME(marker, marker_name) \ |
---|
284 | do { \ |
---|
285 | marker = 1; \ |
---|
286 | __tmp_marker_inscope = 1; \ |
---|
287 | __gmp_tmp_debug_mark (ASSERT_FILE, ASSERT_LINE, \ |
---|
288 | &__tmp_marker, &__tmp_marker_struct, \ |
---|
289 | __tmp_marker_name, marker_name); \ |
---|
290 | } while (0) |
---|
291 | #define TMP_ALLOC(size) \ |
---|
292 | __gmp_tmp_debug_alloc (ASSERT_FILE, ASSERT_LINE, \ |
---|
293 | __tmp_marker_inscope, \ |
---|
294 | &__tmp_marker, __tmp_marker_name, size) |
---|
295 | #define TMP_FREE_NAME(marker, marker_name) \ |
---|
296 | do { \ |
---|
297 | __gmp_tmp_debug_free (ASSERT_FILE, ASSERT_LINE, \ |
---|
298 | marker, &__tmp_marker, \ |
---|
299 | __tmp_marker_name, marker_name); \ |
---|
300 | } while (0) |
---|
301 | #endif |
---|
302 | |
---|
303 | |
---|
304 | /* Allocating various types. */ |
---|
305 | #define TMP_ALLOC_TYPE(n,type) ((type *) TMP_ALLOC ((n) * sizeof (type))) |
---|
306 | #define TMP_ALLOC_LIMBS(n) TMP_ALLOC_TYPE(n,mp_limb_t) |
---|
307 | #define TMP_ALLOC_MP_PTRS(n) TMP_ALLOC_TYPE(n,mp_ptr) |
---|
308 | |
---|
309 | /* It's more efficient to allocate one block than two. This is certainly |
---|
310 | true of the malloc methods, but it can even be true of alloca if that |
---|
311 | involves copying a chunk of stack (various RISCs), or a call to a stack |
---|
312 | bounds check (mingw). In any case, when debugging keep separate blocks |
---|
313 | so a redzoning malloc debugger can protect each individually. */ |
---|
314 | #if WANT_TMP_DEBUG |
---|
315 | #define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize) \ |
---|
316 | do { \ |
---|
317 | (xp) = TMP_ALLOC_LIMBS (xsize); \ |
---|
318 | (yp) = TMP_ALLOC_LIMBS (ysize); \ |
---|
319 | } while (0) |
---|
320 | #else |
---|
321 | #define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize) \ |
---|
322 | do { \ |
---|
323 | (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize)); \ |
---|
324 | (yp) = (xp) + (xsize); \ |
---|
325 | } while (0) |
---|
326 | #endif |
---|
327 | |
---|
328 | |
---|
329 | /* From gmp.h, nicer names for internal use. */ |
---|
330 | #define MPN_CMP(result, xp, yp, size) __GMPN_CMP(result, xp, yp, size) |
---|
331 | |
---|
332 | #define ABS(x) ((x) >= 0 ? (x) : -(x)) |
---|
333 | #undef MIN |
---|
334 | #define MIN(l,o) ((l) < (o) ? (l) : (o)) |
---|
335 | #undef MAX |
---|
336 | #define MAX(h,i) ((h) > (i) ? (h) : (i)) |
---|
337 | #define numberof(x) (sizeof (x) / sizeof ((x)[0])) |
---|
338 | |
---|
339 | /* Field access macros. */ |
---|
340 | #define SIZ(x) ((x)->_mp_size) |
---|
341 | #define ABSIZ(x) ABS (SIZ (x)) |
---|
342 | #define PTR(x) ((x)->_mp_d) |
---|
343 | #define LIMBS(x) ((x)->_mp_d) |
---|
344 | #define EXP(x) ((x)->_mp_exp) |
---|
345 | #define PREC(x) ((x)->_mp_prec) |
---|
346 | #define ALLOC(x) ((x)->_mp_alloc) |
---|
347 | |
---|
348 | /* n-1 inverts any low zeros and the lowest one bit. If n&(n-1) leaves zero |
---|
349 | then that lowest one bit must have been the only bit set. n==0 will |
---|
350 | return true though, so avoid that. */ |
---|
351 | #define POW2_P(n) (((n) & ((n) - 1)) == 0) |
---|
352 | |
---|
353 | |
---|
354 | /* Might be already defined by gmp-mparam.h, otherwise use what's in gmp.h. */ |
---|
355 | #ifndef BITS_PER_MP_LIMB |
---|
356 | #define BITS_PER_MP_LIMB __GMP_BITS_PER_MP_LIMB |
---|
357 | #endif |
---|
358 | |
---|
359 | |
---|
360 | /* The "short" defines are a bit different because shorts are promoted to |
---|
361 | ints by ~ or >> etc. */ |
---|
362 | |
---|
363 | #ifndef ULONG_MAX |
---|
364 | #define ULONG_MAX __GMP_ULONG_MAX |
---|
365 | #endif |
---|
366 | #ifndef UINT_MAX |
---|
367 | #define UINT_MAX __GMP_UINT_MAX |
---|
368 | #endif |
---|
369 | #ifndef USHRT_MAX |
---|
370 | #define USHRT_MAX __GMP_USHRT_MAX |
---|
371 | #endif |
---|
372 | #define MP_LIMB_T_MAX (~ (mp_limb_t) 0) |
---|
373 | |
---|
374 | /* Must cast ULONG_MAX etc to unsigned long etc, since they might not be |
---|
375 | unsigned on a K&R compiler. In particular the HP-UX 10 bundled K&R cc |
---|
376 | treats the plain decimal values in <limits.h> as signed. */ |
---|
377 | #define ULONG_HIGHBIT (ULONG_MAX ^ ((unsigned long) ULONG_MAX >> 1)) |
---|
378 | #define UINT_HIGHBIT (UINT_MAX ^ ((unsigned) UINT_MAX >> 1)) |
---|
379 | #define USHRT_HIGHBIT ((unsigned short) (USHRT_MAX ^ ((unsigned short) USHRT_MAX >> 1))) |
---|
380 | #define GMP_LIMB_HIGHBIT (MP_LIMB_T_MAX ^ (MP_LIMB_T_MAX >> 1)) |
---|
381 | |
---|
382 | #ifndef LONG_MIN |
---|
383 | #define LONG_MIN ((long) ULONG_HIGHBIT) |
---|
384 | #endif |
---|
385 | #ifndef LONG_MAX |
---|
386 | #define LONG_MAX (-(LONG_MIN+1)) |
---|
387 | #endif |
---|
388 | |
---|
389 | #ifndef INT_MIN |
---|
390 | #define INT_MIN ((int) UINT_HIGHBIT) |
---|
391 | #endif |
---|
392 | #ifndef INT_MAX |
---|
393 | #define INT_MAX (-(INT_MIN+1)) |
---|
394 | #endif |
---|
395 | |
---|
396 | #ifndef SHRT_MIN |
---|
397 | #define SHRT_MIN ((short) USHRT_HIGHBIT) |
---|
398 | #endif |
---|
399 | #ifndef SHRT_MAX |
---|
400 | #define SHRT_MAX ((short) (-(SHRT_MIN+1))) |
---|
401 | #endif |
---|
402 | |
---|
403 | #if __GMP_MP_SIZE_T_INT |
---|
404 | #define MP_SIZE_T_MAX INT_MAX |
---|
405 | #define MP_SIZE_T_MIN INT_MIN |
---|
406 | #else |
---|
407 | #define MP_SIZE_T_MAX LONG_MAX |
---|
408 | #define MP_SIZE_T_MIN LONG_MIN |
---|
409 | #endif |
---|
410 | |
---|
411 | #define LONG_HIGHBIT LONG_MIN |
---|
412 | #define INT_HIGHBIT INT_MIN |
---|
413 | #define SHRT_HIGHBIT SHRT_MIN |
---|
414 | |
---|
415 | |
---|
416 | #define GMP_NUMB_HIGHBIT (CNST_LIMB(1) << (GMP_NUMB_BITS-1)) |
---|
417 | |
---|
418 | #if GMP_NAIL_BITS == 0 |
---|
419 | #define GMP_NAIL_LOWBIT CNST_LIMB(0) |
---|
420 | #else |
---|
421 | #define GMP_NAIL_LOWBIT (CNST_LIMB(1) << GMP_NUMB_BITS) |
---|
422 | #endif |
---|
423 | |
---|
424 | #if GMP_NAIL_BITS != 0 |
---|
425 | /* Set various *_THRESHOLD values to be used for nails. Thus we avoid using |
---|
426 | code that has not yet been qualified. */ |
---|
427 | |
---|
428 | #undef DIV_SB_PREINV_THRESHOLD |
---|
429 | #undef DIV_DC_THRESHOLD |
---|
430 | #undef POWM_THRESHOLD |
---|
431 | #define DIV_SB_PREINV_THRESHOLD MP_SIZE_T_MAX |
---|
432 | #define DIV_DC_THRESHOLD 50 |
---|
433 | #define POWM_THRESHOLD 0 |
---|
434 | |
---|
435 | #undef GCD_ACCEL_THRESHOLD |
---|
436 | #undef GCDEXT_THRESHOLD |
---|
437 | #define GCD_ACCEL_THRESHOLD 3 |
---|
438 | #define GCDEXT_THRESHOLD 20 |
---|
439 | |
---|
440 | #undef DIVREM_1_NORM_THRESHOLD |
---|
441 | #undef DIVREM_1_UNNORM_THRESHOLD |
---|
442 | #undef MOD_1_NORM_THRESHOLD |
---|
443 | #undef MOD_1_UNNORM_THRESHOLD |
---|
444 | #undef USE_PREINV_DIVREM_1 |
---|
445 | #undef USE_PREINV_MOD_1 |
---|
446 | #undef DIVREM_2_THRESHOLD |
---|
447 | #undef DIVEXACT_1_THRESHOLD |
---|
448 | #undef MODEXACT_1_ODD_THRESHOLD |
---|
449 | #define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* preinv always */ |
---|
450 | #define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* always */ |
---|
451 | #define MOD_1_NORM_THRESHOLD MP_SIZE_T_MAX /* always */ |
---|
452 | #define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* always */ |
---|
453 | #define USE_PREINV_DIVREM_1 0 /* preinv never */ |
---|
454 | #define USE_PREINV_MOD_1 0 /* preinv never */ |
---|
455 | #define DIVREM_2_THRESHOLD MP_SIZE_T_MAX /* preinv never */ |
---|
456 | #define DIVEXACT_1_THRESHOLD MP_SIZE_T_MAX /* always */ |
---|
457 | #define MODEXACT_1_ODD_THRESHOLD MP_SIZE_T_MAX /* always */ |
---|
458 | |
---|
459 | #undef GET_STR_DC_THRESHOLD |
---|
460 | #undef GET_STR_PRECOMPUTE_THRESHOLD |
---|
461 | #undef SET_STR_THRESHOLD |
---|
462 | #define GET_STR_DC_THRESHOLD 22 |
---|
463 | #define GET_STR_PRECOMPUTE_THRESHOLD 42 |
---|
464 | #define SET_STR_THRESHOLD 3259 |
---|
465 | |
---|
466 | #undef MUL_FFT_TABLE |
---|
467 | #undef MUL_FFT_MODF_THRESHOLD |
---|
468 | #undef MUL_FFT_THRESHOLD |
---|
469 | #define MUL_FFT_TABLE { 400, 928, 1856, 3840, 7168, 20480, 0 } |
---|
470 | #define MUL_FFT_MODF_THRESHOLD 416 |
---|
471 | #define MUL_FFT_THRESHOLD MP_SIZE_T_MAX |
---|
472 | |
---|
473 | #undef SQR_FFT_TABLE |
---|
474 | #undef SQR_FFT_MODF_THRESHOLD |
---|
475 | #undef SQR_FFT_THRESHOLD |
---|
476 | #define SQR_FFT_TABLE { 400, 992, 1984, 3840, 9216, 20480, 0 } |
---|
477 | #define SQR_FFT_MODF_THRESHOLD 416 |
---|
478 | #define SQR_FFT_THRESHOLD MP_SIZE_T_MAX |
---|
479 | |
---|
480 | #endif |
---|
481 | |
---|
482 | /* Swap macros. */ |
---|
483 | |
---|
484 | #define MP_LIMB_T_SWAP(x, y) \ |
---|
485 | do { \ |
---|
486 | mp_limb_t __mp_limb_t_swap__tmp = (x); \ |
---|
487 | (x) = (y); \ |
---|
488 | (y) = __mp_limb_t_swap__tmp; \ |
---|
489 | } while (0) |
---|
490 | #define MP_SIZE_T_SWAP(x, y) \ |
---|
491 | do { \ |
---|
492 | mp_size_t __mp_size_t_swap__tmp = (x); \ |
---|
493 | (x) = (y); \ |
---|
494 | (y) = __mp_size_t_swap__tmp; \ |
---|
495 | } while (0) |
---|
496 | |
---|
497 | #define MP_PTR_SWAP(x, y) \ |
---|
498 | do { \ |
---|
499 | mp_ptr __mp_ptr_swap__tmp = (x); \ |
---|
500 | (x) = (y); \ |
---|
501 | (y) = __mp_ptr_swap__tmp; \ |
---|
502 | } while (0) |
---|
503 | #define MP_SRCPTR_SWAP(x, y) \ |
---|
504 | do { \ |
---|
505 | mp_srcptr __mp_srcptr_swap__tmp = (x); \ |
---|
506 | (x) = (y); \ |
---|
507 | (y) = __mp_srcptr_swap__tmp; \ |
---|
508 | } while (0) |
---|
509 | |
---|
510 | #define MPN_PTR_SWAP(xp,xs, yp,ys) \ |
---|
511 | do { \ |
---|
512 | MP_PTR_SWAP (xp, yp); \ |
---|
513 | MP_SIZE_T_SWAP (xs, ys); \ |
---|
514 | } while(0) |
---|
515 | #define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \ |
---|
516 | do { \ |
---|
517 | MP_SRCPTR_SWAP (xp, yp); \ |
---|
518 | MP_SIZE_T_SWAP (xs, ys); \ |
---|
519 | } while(0) |
---|
520 | |
---|
521 | #define MPZ_PTR_SWAP(x, y) \ |
---|
522 | do { \ |
---|
523 | mpz_ptr __mpz_ptr_swap__tmp = (x); \ |
---|
524 | (x) = (y); \ |
---|
525 | (y) = __mpz_ptr_swap__tmp; \ |
---|
526 | } while (0) |
---|
527 | #define MPZ_SRCPTR_SWAP(x, y) \ |
---|
528 | do { \ |
---|
529 | mpz_srcptr __mpz_srcptr_swap__tmp = (x); \ |
---|
530 | (x) = (y); \ |
---|
531 | (y) = __mpz_srcptr_swap__tmp; \ |
---|
532 | } while (0) |
---|
533 | |
---|
534 | |
---|
535 | void *__gmp_default_allocate _PROTO ((size_t)); |
---|
536 | void *__gmp_default_reallocate _PROTO ((void *, size_t, size_t)); |
---|
537 | void __gmp_default_free _PROTO ((void *, size_t)); |
---|
538 | |
---|
539 | #define __GMP_ALLOCATE_FUNC_TYPE(n,type) \ |
---|
540 | ((type *) (*__gmp_allocate_func) ((n) * sizeof (type))) |
---|
541 | #define __GMP_ALLOCATE_FUNC_LIMBS(n) __GMP_ALLOCATE_FUNC_TYPE (n, mp_limb_t) |
---|
542 | |
---|
543 | #define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type) \ |
---|
544 | ((type *) (*__gmp_reallocate_func) \ |
---|
545 | (p, (old_size) * sizeof (type), (new_size) * sizeof (type))) |
---|
546 | #define __GMP_REALLOCATE_FUNC_LIMBS(p, old_size, new_size) \ |
---|
547 | __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, mp_limb_t) |
---|
548 | |
---|
549 | #define __GMP_FREE_FUNC_TYPE(p,n,type) (*__gmp_free_func) (p, (n) * sizeof (type)) |
---|
550 | #define __GMP_FREE_FUNC_LIMBS(p,n) __GMP_FREE_FUNC_TYPE (p, n, mp_limb_t) |
---|
551 | |
---|
552 | #define __GMP_REALLOCATE_FUNC_MAYBE(ptr, oldsize, newsize) \ |
---|
553 | do { \ |
---|
554 | if ((oldsize) != (newsize)) \ |
---|
555 | (ptr) = (*__gmp_reallocate_func) (ptr, oldsize, newsize); \ |
---|
556 | } while (0) |
---|
557 | |
---|
558 | #define __GMP_REALLOCATE_FUNC_MAYBE_TYPE(ptr, oldsize, newsize, type) \ |
---|
559 | do { \ |
---|
560 | if ((oldsize) != (newsize)) \ |
---|
561 | (ptr) = (type *) (*__gmp_reallocate_func) \ |
---|
562 | (ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type)); \ |
---|
563 | } while (0) |
---|
564 | |
---|
565 | |
---|
566 | /* Dummy for non-gcc, code involving it will go dead. */ |
---|
567 | #ifndef __GNUC__ |
---|
568 | #define __builtin_constant_p(x) 0 |
---|
569 | #endif |
---|
570 | |
---|
571 | |
---|
572 | /* In gcc 2.96 and up on i386, tail calls are optimized to jumps if the |
---|
573 | stack usage is compatible. __attribute__ ((regparm (N))) helps by |
---|
574 | putting leading parameters in registers, avoiding extra stack. |
---|
575 | |
---|
576 | regparm cannot be used with calls going through the PLT, because the |
---|
577 | binding code there may clobber the registers (%eax, %edx, %ecx) used for |
---|
578 | the regparm parameters. Calls to local (ie. static) functions could |
---|
579 | still use this, if we cared to differentiate locals and globals. */ |
---|
580 | |
---|
581 | #if HAVE_HOST_CPU_FAMILY_x86 && __GMP_GNUC_PREREQ (2,96) && ! defined (PIC) \ |
---|
582 | && ! WANT_PROFILING_PROF && ! WANT_PROFILING_GPROF |
---|
583 | #define USE_LEADING_REGPARM 1 |
---|
584 | #else |
---|
585 | #define USE_LEADING_REGPARM 0 |
---|
586 | #endif |
---|
587 | |
---|
588 | /* Macros for altering parameter order according to regparm usage. */ |
---|
589 | #if USE_LEADING_REGPARM |
---|
590 | #define REGPARM_2_1(a,b,x) x,a,b |
---|
591 | #define REGPARM_3_1(a,b,c,x) x,a,b,c |
---|
592 | #define REGPARM_ATTR(n) __attribute__ ((regparm (n))) |
---|
593 | #else |
---|
594 | #define REGPARM_2_1(a,b,x) a,b,x |
---|
595 | #define REGPARM_3_1(a,b,c,x) a,b,c,x |
---|
596 | #define REGPARM_ATTR(n) |
---|
597 | #endif |
---|
598 | |
---|
599 | |
---|
600 | /* ASM_L gives a local label for a gcc asm block, for use when temporary |
---|
601 | local labels like "1:" might not be available, which is the case for |
---|
602 | instance on the x86s (the SCO assembler doesn't support them). |
---|
603 | |
---|
604 | The label generated is made unique by including "%=" which is a unique |
---|
605 | number for each insn. This ensures the same name can be used in multiple |
---|
606 | asm blocks, perhaps via a macro. Since jumps between asm blocks are not |
---|
607 | allowed there's no need for a label to be usable outside a single |
---|
608 | block. */ |
---|
609 | |
---|
610 | #define ASM_L(name) LSYM_PREFIX "asm_%=_" #name |
---|
611 | |
---|
612 | |
---|
613 | #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86 |
---|
614 | #if 0 |
---|
615 | /* FIXME: Check that these actually improve things. |
---|
616 | FIXME: Need a cld after each std. |
---|
617 | FIXME: Can't have inputs in clobbered registers, must describe them as |
---|
618 | dummy outputs, and add volatile. */ |
---|
619 | #define MPN_COPY_INCR(DST, SRC, N) \ |
---|
620 | __asm__ ("cld\n\trep\n\tmovsl" : : \ |
---|
621 | "D" (DST), "S" (SRC), "c" (N) : \ |
---|
622 | "cx", "di", "si", "memory") |
---|
623 | #define MPN_COPY_DECR(DST, SRC, N) \ |
---|
624 | __asm__ ("std\n\trep\n\tmovsl" : : \ |
---|
625 | "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) : \ |
---|
626 | "cx", "di", "si", "memory") |
---|
627 | #endif |
---|
628 | #endif |
---|
629 | |
---|
630 | |
---|
631 | void __gmpz_aorsmul_1 _PROTO ((REGPARM_3_1 (mpz_ptr w, mpz_srcptr u, mp_limb_t v, mp_size_t sub))) REGPARM_ATTR(1); |
---|
632 | #define mpz_aorsmul_1(w,u,v,sub) __gmpz_aorsmul_1 (REGPARM_3_1 (w, u, v, sub)) |
---|
633 | |
---|
634 | #define mpz_n_pow_ui __gmpz_n_pow_ui |
---|
635 | void mpz_n_pow_ui _PROTO ((mpz_ptr, mp_srcptr, mp_size_t, unsigned long)); |
---|
636 | |
---|
637 | |
---|
638 | #define mpn_add_nc __MPN(add_nc) |
---|
639 | __GMP_DECLSPEC mp_limb_t mpn_add_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)); |
---|
640 | |
---|
641 | #define mpn_addmul_1c __MPN(addmul_1c) |
---|
642 | __GMP_DECLSPEC mp_limb_t mpn_addmul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)); |
---|
643 | |
---|
644 | #define mpn_addsub_n __MPN(addsub_n) |
---|
645 | __GMP_DECLSPEC mp_limb_t mpn_addsub_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); |
---|
646 | |
---|
647 | #define mpn_addsub_nc __MPN(addsub_nc) |
---|
648 | __GMP_DECLSPEC mp_limb_t mpn_addsub_nc __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)); |
---|
649 | |
---|
650 | #define mpn_divrem_1c __MPN(divrem_1c) |
---|
651 | __GMP_DECLSPEC mp_limb_t mpn_divrem_1c __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)); |
---|
652 | |
---|
653 | #define mpn_dump __MPN(dump) |
---|
654 | __GMP_DECLSPEC void mpn_dump __GMP_PROTO ((mp_srcptr, mp_size_t)); |
---|
655 | |
---|
656 | #define mpn_fib2_ui __MPN(fib2_ui) |
---|
657 | mp_size_t mpn_fib2_ui _PROTO ((mp_ptr, mp_ptr, unsigned long)); |
---|
658 | |
---|
659 | /* Remap names of internal mpn functions. */ |
---|
660 | #define __clz_tab __MPN(clz_tab) |
---|
661 | #define mpn_udiv_w_sdiv __MPN(udiv_w_sdiv) |
---|
662 | |
---|
663 | #define mpn_gcd_finda __MPN(gcd_finda) |
---|
664 | mp_limb_t mpn_gcd_finda _PROTO((const mp_limb_t cp[2])) __GMP_ATTRIBUTE_PURE; |
---|
665 | |
---|
666 | #define mpn_jacobi_base __MPN(jacobi_base) |
---|
667 | int mpn_jacobi_base _PROTO ((mp_limb_t a, mp_limb_t b, int result_bit1)) ATTRIBUTE_CONST; |
---|
668 | |
---|
669 | #define mpn_mod_1c __MPN(mod_1c) |
---|
670 | __GMP_DECLSPEC mp_limb_t mpn_mod_1c __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE; |
---|
671 | |
---|
672 | #define mpn_mul_1c __MPN(mul_1c) |
---|
673 | __GMP_DECLSPEC mp_limb_t mpn_mul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)); |
---|
674 | |
---|
675 | #define mpn_mul_2 __MPN(mul_2) |
---|
676 | mp_limb_t mpn_mul_2 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)); |
---|
677 | |
---|
678 | #define mpn_mul_basecase __MPN(mul_basecase) |
---|
679 | __GMP_DECLSPEC void mpn_mul_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); |
---|
680 | |
---|
681 | #define mpn_sqr_n __MPN(sqr_n) |
---|
682 | __GMP_DECLSPEC void mpn_sqr_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t)); |
---|
683 | |
---|
684 | #define mpn_sqr_basecase __MPN(sqr_basecase) |
---|
685 | __GMP_DECLSPEC void mpn_sqr_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t)); |
---|
686 | |
---|
687 | #define mpn_sub_nc __MPN(sub_nc) |
---|
688 | __GMP_DECLSPEC mp_limb_t mpn_sub_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)); |
---|
689 | |
---|
690 | #define mpn_submul_1c __MPN(submul_1c) |
---|
691 | __GMP_DECLSPEC mp_limb_t mpn_submul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)); |
---|
692 | |
---|
693 | |
---|
694 | typedef __gmp_randstate_struct *gmp_randstate_ptr; |
---|
695 | |
---|
696 | #define _gmp_rand __gmp_rand |
---|
697 | __GMP_DECLSPEC void _gmp_rand _PROTO ((mp_ptr, gmp_randstate_t, unsigned long int)); |
---|
698 | |
---|
699 | |
---|
700 | /* __gmp_rands is the global state for the old-style random functions, and |
---|
701 | is also used in the test programs (hence the __GMP_DECLSPEC). |
---|
702 | |
---|
703 | There's no seeding here, so mpz_random etc will generate the same |
---|
704 | sequence every time. This is not unlike the C library random functions |
---|
705 | if you don't seed them, so perhaps it's acceptable. Digging up a seed |
---|
706 | from /dev/random or the like would work on many systems, but might |
---|
707 | encourage a false confidence, since it'd be pretty much impossible to do |
---|
708 | something that would work reliably everywhere. In any case the new style |
---|
709 | functions are recommended to applications which care about randomness, so |
---|
710 | the old functions aren't too important. */ |
---|
711 | |
---|
712 | __GMP_DECLSPEC extern char __gmp_rands_initialized; |
---|
713 | __GMP_DECLSPEC extern gmp_randstate_t __gmp_rands; |
---|
714 | |
---|
715 | #define RANDS \ |
---|
716 | ((__gmp_rands_initialized ? 0 \ |
---|
717 | : (__gmp_rands_initialized = 1, \ |
---|
718 | gmp_randinit_default (__gmp_rands), 0)), \ |
---|
719 | __gmp_rands) |
---|
720 | |
---|
721 | /* this is used by the test programs, to free memory */ |
---|
722 | #define RANDS_CLEAR() \ |
---|
723 | do { \ |
---|
724 | if (__gmp_rands_initialized) \ |
---|
725 | { \ |
---|
726 | __gmp_rands_initialized = 0; \ |
---|
727 | gmp_randclear (__gmp_rands); \ |
---|
728 | } \ |
---|
729 | } while (0) |
---|
730 | |
---|
731 | |
---|
732 | /* kara uses n+1 limbs of temporary space and then recurses with the |
---|
733 | balance, so need (n+1) + (ceil(n/2)+1) + (ceil(n/4)+1) + ... */ |
---|
734 | #define MPN_KARA_MUL_N_TSIZE(n) (2*((n)+BITS_PER_MP_LIMB)) |
---|
735 | #define MPN_KARA_SQR_N_TSIZE(n) (2*((n)+BITS_PER_MP_LIMB)) |
---|
736 | |
---|
737 | /* toom3 uses 4*(ceil(n/3)) of temporary space and then recurses with the |
---|
738 | balance either into itself or kara. The following might be |
---|
739 | overestimates. */ |
---|
740 | #define MPN_TOOM3_MUL_N_TSIZE(n) (2*(n) + 3*BITS_PER_MP_LIMB) |
---|
741 | #define MPN_TOOM3_SQR_N_TSIZE(n) (2*(n) + 3*BITS_PER_MP_LIMB) |
---|
742 | |
---|
743 | /* need 2 so that n2>=1 */ |
---|
744 | #define MPN_KARA_MUL_N_MINSIZE 2 |
---|
745 | #define MPN_KARA_SQR_N_MINSIZE 2 |
---|
746 | |
---|
747 | /* Need l>=1, ls>=1, and 2*ls > l (the latter for the tD MPN_INCR_U) */ |
---|
748 | #define MPN_TOOM3_MUL_N_MINSIZE 11 |
---|
749 | #define MPN_TOOM3_SQR_N_MINSIZE 11 |
---|
750 | |
---|
751 | #define mpn_sqr_diagonal __MPN(sqr_diagonal) |
---|
752 | void mpn_sqr_diagonal _PROTO ((mp_ptr, mp_srcptr, mp_size_t)); |
---|
753 | |
---|
754 | #define mpn_kara_mul_n __MPN(kara_mul_n) |
---|
755 | void mpn_kara_mul_n _PROTO((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr)); |
---|
756 | |
---|
757 | #define mpn_kara_sqr_n __MPN(kara_sqr_n) |
---|
758 | void mpn_kara_sqr_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); |
---|
759 | |
---|
760 | #define mpn_toom3_mul_n __MPN(toom3_mul_n) |
---|
761 | void mpn_toom3_mul_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t,mp_ptr)); |
---|
762 | |
---|
763 | #define mpn_toom3_sqr_n __MPN(toom3_sqr_n) |
---|
764 | void mpn_toom3_sqr_n _PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); |
---|
765 | |
---|
766 | |
---|
767 | #define mpn_fft_best_k __MPN(fft_best_k) |
---|
768 | int mpn_fft_best_k _PROTO ((mp_size_t n, int sqr)) ATTRIBUTE_CONST; |
---|
769 | |
---|
770 | #define mpn_mul_fft __MPN(mul_fft) |
---|
771 | void mpn_mul_fft _PROTO ((mp_ptr op, mp_size_t pl, |
---|
772 | mp_srcptr n, mp_size_t nl, |
---|
773 | mp_srcptr m, mp_size_t ml, |
---|
774 | int k)); |
---|
775 | |
---|
776 | #define mpn_mul_fft_full __MPN(mul_fft_full) |
---|
777 | void mpn_mul_fft_full _PROTO ((mp_ptr op, |
---|
778 | mp_srcptr n, mp_size_t nl, |
---|
779 | mp_srcptr m, mp_size_t ml)); |
---|
780 | |
---|
781 | #define mpn_fft_next_size __MPN(fft_next_size) |
---|
782 | mp_size_t mpn_fft_next_size _PROTO ((mp_size_t pl, int k)) ATTRIBUTE_CONST; |
---|
783 | |
---|
784 | #define mpn_sb_divrem_mn __MPN(sb_divrem_mn) |
---|
785 | mp_limb_t mpn_sb_divrem_mn _PROTO ((mp_ptr, mp_ptr, mp_size_t, |
---|
786 | mp_srcptr, mp_size_t)); |
---|
787 | |
---|
788 | #define mpn_dc_divrem_n __MPN(dc_divrem_n) |
---|
789 | mp_limb_t mpn_dc_divrem_n _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t)); |
---|
790 | |
---|
791 | /* #define mpn_tdiv_q __MPN(tdiv_q) */ |
---|
792 | /* void mpn_tdiv_q _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); */ |
---|
793 | |
---|
794 | #define mpz_divexact_gcd __gmpz_divexact_gcd |
---|
795 | void mpz_divexact_gcd _PROTO ((mpz_ptr q, mpz_srcptr a, mpz_srcptr d)); |
---|
796 | |
---|
797 | #define mpz_inp_str_nowhite __gmpz_inp_str_nowhite |
---|
798 | #ifdef _GMP_H_HAVE_FILE |
---|
799 | size_t mpz_inp_str_nowhite _PROTO ((mpz_ptr x, FILE *stream, int base, int c, size_t nread)); |
---|
800 | #endif |
---|
801 | |
---|
802 | #define mpn_divisible_p __MPN(divisible_p) |
---|
803 | int mpn_divisible_p _PROTO ((mp_srcptr ap, mp_size_t asize, |
---|
804 | mp_srcptr dp, mp_size_t dsize)) __GMP_ATTRIBUTE_PURE; |
---|
805 | |
---|
806 | #define mpn_rootrem __gmpn_rootrem |
---|
807 | mp_size_t mpn_rootrem _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)); |
---|
808 | |
---|
809 | |
---|
810 | /* from gmp.h */ |
---|
811 | #if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc |
---|
812 | #define MPN_COPY_INCR(dst, src, size) \ |
---|
813 | do { \ |
---|
814 | ASSERT ((size) >= 0); \ |
---|
815 | ASSERT (MPN_SAME_OR_INCR_P (dst, src, size)); \ |
---|
816 | __GMPN_COPY_INCR (dst, src, size); \ |
---|
817 | } while (0) |
---|
818 | #endif |
---|
819 | |
---|
820 | #if defined (_CRAY) |
---|
821 | #define MPN_COPY_INCR(dst, src, n) \ |
---|
822 | do { \ |
---|
823 | int __i; /* Faster on some Crays with plain int */ \ |
---|
824 | _Pragma ("_CRI ivdep"); \ |
---|
825 | for (__i = 0; __i < (n); __i++) \ |
---|
826 | (dst)[__i] = (src)[__i]; \ |
---|
827 | } while (0) |
---|
828 | #endif |
---|
829 | |
---|
830 | #define mpn_copyi __MPN(copyi) |
---|
831 | __GMP_DECLSPEC void mpn_copyi _PROTO ((mp_ptr, mp_srcptr, mp_size_t)); |
---|
832 | |
---|
833 | #if ! defined (MPN_COPY_INCR) && HAVE_NATIVE_mpn_copyi |
---|
834 | #define MPN_COPY_INCR(dst, src, size) \ |
---|
835 | do { \ |
---|
836 | ASSERT ((size) >= 0); \ |
---|
837 | ASSERT (MPN_SAME_OR_INCR_P (dst, src, size)); \ |
---|
838 | mpn_copyi (dst, src, size); \ |
---|
839 | } while (0) |
---|
840 | #endif |
---|
841 | |
---|
842 | /* Copy N limbs from SRC to DST incrementing, N==0 allowed. */ |
---|
843 | #if ! defined (MPN_COPY_INCR) |
---|
844 | #define MPN_COPY_INCR(dst, src, n) \ |
---|
845 | do { \ |
---|
846 | ASSERT ((n) >= 0); \ |
---|
847 | ASSERT (MPN_SAME_OR_INCR_P (dst, src, n)); \ |
---|
848 | if ((n) != 0) \ |
---|
849 | { \ |
---|
850 | mp_size_t __n = (n) - 1; \ |
---|
851 | mp_ptr __dst = (dst); \ |
---|
852 | mp_srcptr __src = (src); \ |
---|
853 | mp_limb_t __x; \ |
---|
854 | __x = *__src++; \ |
---|
855 | if (__n != 0) \ |
---|
856 | { \ |
---|
857 | do \ |
---|
858 | { \ |
---|
859 | *__dst++ = __x; \ |
---|
860 | __x = *__src++; \ |
---|
861 | } \ |
---|
862 | while (--__n); \ |
---|
863 | } \ |
---|
864 | *__dst++ = __x; \ |
---|
865 | } \ |
---|
866 | } while (0) |
---|
867 | #endif |
---|
868 | |
---|
869 | |
---|
870 | /* As per __GMPN_COPY_INCR in gmp.h. */ |
---|
871 | #if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc |
---|
872 | #define MPN_COPY_DECR(dst, src, size) \ |
---|
873 | do { \ |
---|
874 | ASSERT ((size) >= 0); \ |
---|
875 | ASSERT (MPN_SAME_OR_DECR_P (dst, src, size)); \ |
---|
876 | if ((size) != 0) \ |
---|
877 | { \ |
---|
878 | mp_ptr __dst = (dst) + (size); \ |
---|
879 | mp_srcptr __src = (src) + (size); \ |
---|
880 | mp_size_t __size = (size); \ |
---|
881 | do \ |
---|
882 | *--__dst = *--__src; \ |
---|
883 | while (--__size != 0); \ |
---|
884 | } \ |
---|
885 | } while (0) |
---|
886 | #endif |
---|
887 | |
---|
888 | #if defined (_CRAY) |
---|
889 | #define MPN_COPY_DECR(dst, src, n) \ |
---|
890 | do { \ |
---|
891 | int __i; /* Faster on some Crays with plain int */ \ |
---|
892 | _Pragma ("_CRI ivdep"); \ |
---|
893 | for (__i = (n) - 1; __i >= 0; __i--) \ |
---|
894 | (dst)[__i] = (src)[__i]; \ |
---|
895 | } while (0) |
---|
896 | #endif |
---|
897 | |
---|
898 | #define mpn_copyd __MPN(copyd) |
---|
899 | __GMP_DECLSPEC void mpn_copyd _PROTO ((mp_ptr, mp_srcptr, mp_size_t)); |
---|
900 | |
---|
901 | #if ! defined (MPN_COPY_DECR) && HAVE_NATIVE_mpn_copyd |
---|
902 | #define MPN_COPY_DECR(dst, src, size) \ |
---|
903 | do { \ |
---|
904 | ASSERT ((size) >= 0); \ |
---|
905 | ASSERT (MPN_SAME_OR_DECR_P (dst, src, size)); \ |
---|
906 | mpn_copyd (dst, src, size); \ |
---|
907 | } while (0) |
---|
908 | #endif |
---|
909 | |
---|
910 | /* Copy N limbs from SRC to DST decrementing, N==0 allowed. */ |
---|
911 | #if ! defined (MPN_COPY_DECR) |
---|
912 | #define MPN_COPY_DECR(dst, src, n) \ |
---|
913 | do { \ |
---|
914 | ASSERT ((n) >= 0); \ |
---|
915 | ASSERT (MPN_SAME_OR_DECR_P (dst, src, n)); \ |
---|
916 | if ((n) != 0) \ |
---|
917 | { \ |
---|
918 | mp_size_t __n = (n) - 1; \ |
---|
919 | mp_ptr __dst = (dst) + __n; \ |
---|
920 | mp_srcptr __src = (src) + __n; \ |
---|
921 | mp_limb_t __x; \ |
---|
922 | __x = *__src--; \ |
---|
923 | if (__n != 0) \ |
---|
924 | { \ |
---|
925 | do \ |
---|
926 | { \ |
---|
927 | *__dst-- = __x; \ |
---|
928 | __x = *__src--; \ |
---|
929 | } \ |
---|
930 | while (--__n); \ |
---|
931 | } \ |
---|
932 | *__dst-- = __x; \ |
---|
933 | } \ |
---|
934 | } while (0) |
---|
935 | #endif |
---|
936 | |
---|
937 | |
---|
938 | #ifndef MPN_COPY |
---|
939 | #define MPN_COPY(d,s,n) \ |
---|
940 | do { \ |
---|
941 | ASSERT (MPN_SAME_OR_SEPARATE_P (d, s, n)); \ |
---|
942 | MPN_COPY_INCR (d, s, n); \ |
---|
943 | } while (0) |
---|
944 | #endif |
---|
945 | |
---|
946 | |
---|
947 | /* Set {dst,size} to the limbs of {src,size} in reverse order. */ |
---|
948 | #define MPN_REVERSE(dst, src, size) \ |
---|
949 | do { \ |
---|
950 | mp_ptr __dst = (dst); \ |
---|
951 | mp_size_t __size = (size); \ |
---|
952 | mp_srcptr __src = (src) + __size - 1; \ |
---|
953 | mp_size_t __i; \ |
---|
954 | ASSERT ((size) >= 0); \ |
---|
955 | ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \ |
---|
956 | CRAY_Pragma ("_CRI ivdep"); \ |
---|
957 | for (__i = 0; __i < __size; __i++) \ |
---|
958 | { \ |
---|
959 | *__dst = *__src; \ |
---|
960 | __dst++; \ |
---|
961 | __src--; \ |
---|
962 | } \ |
---|
963 | } while (0) |
---|
964 | |
---|
965 | |
---|
966 | /* Zero n limbs at dst. |
---|
967 | |
---|
968 | For power and powerpc we want an inline stu/bdnz loop for zeroing. On |
---|
969 | ppc630 for instance this is optimal since it can sustain only 1 store per |
---|
970 | cycle. |
---|
971 | |
---|
972 | gcc 2.95.x (for powerpc64 -maix64, or powerpc32) doesn't recognise the |
---|
973 | "for" loop in the generic code below can become stu/bdnz. The do/while |
---|
974 | here helps it get to that. The same caveat about plain -mpowerpc64 mode |
---|
975 | applies here as to __GMPN_COPY_INCR in gmp.h. |
---|
976 | |
---|
977 | xlc 3.1 already generates stu/bdnz from the generic C, and does so from |
---|
978 | this loop too. |
---|
979 | |
---|
980 | Enhancement: GLIBC does some trickery with dcbz to zero whole cache lines |
---|
981 | at a time. MPN_ZERO isn't all that important in GMP, so it might be more |
---|
982 | trouble than it's worth to do the same, though perhaps a call to memset |
---|
983 | would be good when on a GNU system. */ |
---|
984 | |
---|
985 | #if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc |
---|
986 | #define MPN_ZERO(dst, n) \ |
---|
987 | do { \ |
---|
988 | ASSERT ((n) >= 0); \ |
---|
989 | if ((n) != 0) \ |
---|
990 | { \ |
---|
991 | mp_ptr __dst = (dst) - 1; \ |
---|
992 | mp_size_t __n = (n); \ |
---|
993 | do \ |
---|
994 | *++__dst = 0; \ |
---|
995 | while (--__n); \ |
---|
996 | } \ |
---|
997 | } while (0) |
---|
998 | #endif |
---|
999 | |
---|
1000 | #ifndef MPN_ZERO |
---|
1001 | #define MPN_ZERO(dst, n) \ |
---|
1002 | do { \ |
---|
1003 | ASSERT ((n) >= 0); \ |
---|
1004 | if ((n) != 0) \ |
---|
1005 | { \ |
---|
1006 | mp_ptr __dst = (dst); \ |
---|
1007 | mp_size_t __n = (n); \ |
---|
1008 | do \ |
---|
1009 | *__dst++ = 0; \ |
---|
1010 | while (--__n); \ |
---|
1011 | } \ |
---|
1012 | } while (0) |
---|
1013 | #endif |
---|
1014 | |
---|
1015 | |
---|
1016 | /* On the x86s repe/scasl doesn't seem useful, since it takes many cycles to |
---|
1017 | start up and would need to strip a lot of zeros before it'd be faster |
---|
1018 | than a simple cmpl loop. Here are some times in cycles for |
---|
1019 | std/repe/scasl/cld and cld/repe/scasl (the latter would be for stripping |
---|
1020 | low zeros). |
---|
1021 | |
---|
1022 | std cld |
---|
1023 | P5 18 16 |
---|
1024 | P6 46 38 |
---|
1025 | K6 36 13 |
---|
1026 | K7 21 20 |
---|
1027 | */ |
---|
1028 | #ifndef MPN_NORMALIZE |
---|
1029 | #define MPN_NORMALIZE(DST, NLIMBS) \ |
---|
1030 | do { \ |
---|
1031 | while (NLIMBS > 0) \ |
---|
1032 | { \ |
---|
1033 | if ((DST)[(NLIMBS) - 1] != 0) \ |
---|
1034 | break; \ |
---|
1035 | NLIMBS--; \ |
---|
1036 | } \ |
---|
1037 | } while (0) |
---|
1038 | #endif |
---|
1039 | #ifndef MPN_NORMALIZE_NOT_ZERO |
---|
1040 | #define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \ |
---|
1041 | do { \ |
---|
1042 | ASSERT ((NLIMBS) >= 1); \ |
---|
1043 | while (1) \ |
---|
1044 | { \ |
---|
1045 | if ((DST)[(NLIMBS) - 1] != 0) \ |
---|
1046 | break; \ |
---|
1047 | NLIMBS--; \ |
---|
1048 | } \ |
---|
1049 | } while (0) |
---|
1050 | #endif |
---|
1051 | |
---|
1052 | /* Strip least significant zero limbs from {ptr,size} by incrementing ptr |
---|
1053 | and decrementing size. low should be ptr[0], and will be the new ptr[0] |
---|
1054 | on returning. The number in {ptr,size} must be non-zero, ie. size!=0 and |
---|
1055 | somewhere a non-zero limb. */ |
---|
1056 | #define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low) \ |
---|
1057 | do { \ |
---|
1058 | ASSERT ((size) >= 1); \ |
---|
1059 | ASSERT ((low) == (ptr)[0]); \ |
---|
1060 | \ |
---|
1061 | while ((low) == 0) \ |
---|
1062 | { \ |
---|
1063 | (size)--; \ |
---|
1064 | ASSERT ((size) >= 1); \ |
---|
1065 | (ptr)++; \ |
---|
1066 | (low) = *(ptr); \ |
---|
1067 | } \ |
---|
1068 | } while (0) |
---|
1069 | |
---|
1070 | /* Initialize X of type mpz_t with space for NLIMBS limbs. X should be a |
---|
1071 | temporary variable; it will be automatically cleared out at function |
---|
1072 | return. We use __x here to make it possible to accept both mpz_ptr and |
---|
1073 | mpz_t arguments. */ |
---|
1074 | #define MPZ_TMP_INIT(X, NLIMBS) \ |
---|
1075 | do { \ |
---|
1076 | mpz_ptr __x = (X); \ |
---|
1077 | ASSERT ((NLIMBS) >= 1); \ |
---|
1078 | __x->_mp_alloc = (NLIMBS); \ |
---|
1079 | __x->_mp_d = (mp_ptr) TMP_ALLOC ((NLIMBS) * BYTES_PER_MP_LIMB); \ |
---|
1080 | } while (0) |
---|
1081 | |
---|
1082 | /* Realloc for an mpz_t WHAT if it has less than NEEDED limbs. */ |
---|
1083 | #define MPZ_REALLOC(z,n) ((n) > ALLOC(z) ? _mpz_realloc(z,n) : PTR(z)) |
---|
1084 | |
---|
1085 | #define MPZ_EQUAL_1_P(z) (SIZ(z)==1 && PTR(z)[0] == 1) |
---|
1086 | |
---|
1087 | |
---|
1088 | /* MPN_FIB2_SIZE(n) is the size in limbs required by mpn_fib2_ui for fp and |
---|
1089 | f1p. |
---|
1090 | |
---|
1091 | From Knuth vol 1 section 1.2.8, F[n] = phi^n/sqrt(5) rounded to the |
---|
1092 | nearest integer, where phi=(1+sqrt(5))/2 is the golden ratio. So the |
---|
1093 | number of bits required is n*log_2((1+sqrt(5))/2) = n*0.6942419. |
---|
1094 | |
---|
1095 | The multiplier used is 23/32=0.71875 for efficient calculation on CPUs |
---|
1096 | without good floating point. There's +2 for rounding up, and a further |
---|
1097 | +2 since at the last step x limbs are doubled into a 2x+1 limb region |
---|
1098 | whereas the actual F[2k] value might be only 2x-1 limbs. |
---|
1099 | |
---|
1100 | Note that a division is done first, since on a 32-bit system it's at |
---|
1101 | least conceivable to go right up to n==ULONG_MAX. (F[2^32-1] would be |
---|
1102 | about 380Mbytes, plus temporary workspace of about 1.2Gbytes here and |
---|
1103 | whatever a multiply of two 190Mbyte numbers takes.) |
---|
1104 | |
---|
1105 | Enhancement: When GMP_NUMB_BITS is not a power of 2 the division could be |
---|
1106 | worked into the multiplier. */ |
---|
1107 | |
---|
1108 | #define MPN_FIB2_SIZE(n) \ |
---|
1109 | ((mp_size_t) ((n) / 32 * 23 / GMP_NUMB_BITS) + 4) |
---|
1110 | |
---|
1111 | |
---|
1112 | /* FIB_TABLE(n) returns the Fibonacci number F[n]. Must have n in the range |
---|
1113 | -1 <= n <= FIB_TABLE_LIMIT. |
---|
1114 | |
---|
1115 | FIB_TABLE_LUCNUM_LIMIT is the largest n for which L[n] = F[n] + 2*F[n-1] |
---|
1116 | fits in a limb. |
---|
1117 | |
---|
1118 | This data generated by code at the end of mpn/generic/fib2_ui.c. */ |
---|
1119 | |
---|
1120 | __GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[]; |
---|
1121 | #define FIB_TABLE(n) (__gmp_fib_table[(n)+1]) |
---|
1122 | |
---|
1123 | #if GMP_NUMB_BITS >= 64 |
---|
1124 | #define FIB_TABLE_LIMIT 93 |
---|
1125 | #define FIB_TABLE_LUCNUM_LIMIT 92 |
---|
1126 | #else |
---|
1127 | #if GMP_NUMB_BITS >= 32 |
---|
1128 | #define FIB_TABLE_LIMIT 47 |
---|
1129 | #define FIB_TABLE_LUCNUM_LIMIT 46 |
---|
1130 | #else |
---|
1131 | #if GMP_NUMB_BITS >= 16 |
---|
1132 | #define FIB_TABLE_LIMIT 24 |
---|
1133 | #define FIB_TABLE_LUCNUM_LIMIT 23 |
---|
1134 | #else |
---|
1135 | #if GMP_NUMB_BITS >= 8 |
---|
1136 | #define FIB_TABLE_LIMIT 13 |
---|
1137 | #define FIB_TABLE_LUCNUM_LIMIT 11 |
---|
1138 | #else |
---|
1139 | #if GMP_NUMB_BITS >= 4 |
---|
1140 | #define FIB_TABLE_LIMIT 7 |
---|
1141 | #define FIB_TABLE_LUCNUM_LIMIT 5 |
---|
1142 | #endif /* 4 */ |
---|
1143 | #endif /* 8 */ |
---|
1144 | #endif /* 16 */ |
---|
1145 | #endif /* 32 */ |
---|
1146 | #endif /* 64 */ |
---|
1147 | |
---|
1148 | |
---|
1149 | /* For a threshold between algorithms A and B, size>=thresh is where B |
---|
1150 | should be used. Special value MP_SIZE_T_MAX means only ever use A, or |
---|
1151 | value 0 means only ever use B. The tests for these special values will |
---|
1152 | be compile-time constants, so the compiler should be able to eliminate |
---|
1153 | the code for the unwanted algorithm. */ |
---|
1154 | |
---|
1155 | #define ABOVE_THRESHOLD(size,thresh) \ |
---|
1156 | ((thresh) == 0 \ |
---|
1157 | || ((thresh) != MP_SIZE_T_MAX \ |
---|
1158 | && (size) >= (thresh))) |
---|
1159 | #define BELOW_THRESHOLD(size,thresh) (! ABOVE_THRESHOLD (size, thresh)) |
---|
1160 | |
---|
1161 | |
---|
1162 | /* If MUL_KARATSUBA_THRESHOLD is not already defined, define it to a |
---|
1163 | value which is good on most machines. */ |
---|
1164 | #ifndef MUL_KARATSUBA_THRESHOLD |
---|
1165 | #define MUL_KARATSUBA_THRESHOLD 32 |
---|
1166 | #endif |
---|
1167 | |
---|
1168 | /* If MUL_TOOM3_THRESHOLD is not already defined, define it to a |
---|
1169 | value which is good on most machines. */ |
---|
1170 | #ifndef MUL_TOOM3_THRESHOLD |
---|
1171 | #define MUL_TOOM3_THRESHOLD 256 |
---|
1172 | #endif |
---|
1173 | |
---|
1174 | /* This is the threshold at which mpn_sqr_basecase should take over from |
---|
1175 | mpn_mul_basecase in mpn_sqr_n. Default is to use mpn_sqr_basecase |
---|
1176 | always. |
---|
1177 | |
---|
1178 | If it turns out that mpn_kara_sqr_n becomes faster than mpn_mul_basecase |
---|
1179 | before mpn_sqr_basecase does, then SQR_BASECASE_THRESHOLD is the |
---|
1180 | karatsuba threshold and SQR_KARATSUBA_THRESHOLD is 0. This oddity arises |
---|
1181 | more or less because SQR_KARATSUBA_THRESHOLD represents the size up to |
---|
1182 | which mpn_sqr_basecase should be used, and that may be never. */ |
---|
1183 | |
---|
1184 | #ifndef SQR_BASECASE_THRESHOLD |
---|
1185 | #define SQR_BASECASE_THRESHOLD 0 |
---|
1186 | #endif |
---|
1187 | |
---|
1188 | #ifndef SQR_KARATSUBA_THRESHOLD |
---|
1189 | #define SQR_KARATSUBA_THRESHOLD (2*MUL_KARATSUBA_THRESHOLD) |
---|
1190 | #endif |
---|
1191 | |
---|
1192 | #ifndef SQR_TOOM3_THRESHOLD |
---|
1193 | #define SQR_TOOM3_THRESHOLD (2*MUL_TOOM3_THRESHOLD) |
---|
1194 | #endif |
---|
1195 | |
---|
1196 | /* First k to use for an FFT modF multiply. A modF FFT is an order |
---|
1197 | log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba, |
---|
1198 | whereas k=4 is 1.33 which is faster than toom3 at 1.485. */ |
---|
1199 | #define FFT_FIRST_K 4 |
---|
1200 | |
---|
1201 | /* Threshold at which FFT should be used to do a modF NxN -> N multiply. */ |
---|
1202 | #ifndef MUL_FFT_MODF_THRESHOLD |
---|
1203 | #define MUL_FFT_MODF_THRESHOLD (MUL_TOOM3_THRESHOLD * 3) |
---|
1204 | #endif |
---|
1205 | #ifndef SQR_FFT_MODF_THRESHOLD |
---|
1206 | #define SQR_FFT_MODF_THRESHOLD (SQR_TOOM3_THRESHOLD * 3) |
---|
1207 | #endif |
---|
1208 | |
---|
1209 | /* Threshold at which FFT should be used to do an NxN -> 2N multiply. This |
---|
1210 | will be a size where FFT is using k=7 or k=8, since an FFT-k used for an |
---|
1211 | NxN->2N multiply and not recursing into itself is an order |
---|
1212 | log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which |
---|
1213 | is the first better than toom3. */ |
---|
1214 | #ifndef MUL_FFT_THRESHOLD |
---|
1215 | #define MUL_FFT_THRESHOLD (MUL_FFT_MODF_THRESHOLD * 10) |
---|
1216 | #endif |
---|
1217 | #ifndef SQR_FFT_THRESHOLD |
---|
1218 | #define SQR_FFT_THRESHOLD (SQR_FFT_MODF_THRESHOLD * 10) |
---|
1219 | #endif |
---|
1220 | |
---|
1221 | /* Table of thresholds for successive modF FFT "k"s. The first entry is |
---|
1222 | where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2, |
---|
1223 | etc. See mpn_fft_best_k(). */ |
---|
1224 | #ifndef MUL_FFT_TABLE |
---|
1225 | #define MUL_FFT_TABLE \ |
---|
1226 | { MUL_TOOM3_THRESHOLD * 4, /* k=5 */ \ |
---|
1227 | MUL_TOOM3_THRESHOLD * 8, /* k=6 */ \ |
---|
1228 | MUL_TOOM3_THRESHOLD * 16, /* k=7 */ \ |
---|
1229 | MUL_TOOM3_THRESHOLD * 32, /* k=8 */ \ |
---|
1230 | MUL_TOOM3_THRESHOLD * 96, /* k=9 */ \ |
---|
1231 | MUL_TOOM3_THRESHOLD * 288, /* k=10 */ \ |
---|
1232 | 0 } |
---|
1233 | #endif |
---|
1234 | #ifndef SQR_FFT_TABLE |
---|
1235 | #define SQR_FFT_TABLE \ |
---|
1236 | { SQR_TOOM3_THRESHOLD * 4, /* k=5 */ \ |
---|
1237 | SQR_TOOM3_THRESHOLD * 8, /* k=6 */ \ |
---|
1238 | SQR_TOOM3_THRESHOLD * 16, /* k=7 */ \ |
---|
1239 | SQR_TOOM3_THRESHOLD * 32, /* k=8 */ \ |
---|
1240 | SQR_TOOM3_THRESHOLD * 96, /* k=9 */ \ |
---|
1241 | SQR_TOOM3_THRESHOLD * 288, /* k=10 */ \ |
---|
1242 | 0 } |
---|
1243 | #endif |
---|
1244 | |
---|
1245 | #ifndef FFT_TABLE_ATTRS |
---|
1246 | #define FFT_TABLE_ATTRS static const |
---|
1247 | #endif |
---|
1248 | |
---|
1249 | #define MPN_FFT_TABLE_SIZE 16 |
---|
1250 | |
---|
1251 | |
---|
1252 | /* mpn_dc_divrem_n(n) calls 2*mul(n/2)+2*div(n/2), thus to be faster than |
---|
1253 | div(n) = 4*div(n/2), we need mul(n/2) to be faster than the classic way, |
---|
1254 | i.e. n/2 >= MUL_KARATSUBA_THRESHOLD |
---|
1255 | |
---|
1256 | Measured values are between 2 and 4 times MUL_KARATSUBA_THRESHOLD, so go |
---|
1257 | for 3 as an average. */ |
---|
1258 | |
---|
1259 | #ifndef DIV_DC_THRESHOLD |
---|
1260 | #define DIV_DC_THRESHOLD (3 * MUL_KARATSUBA_THRESHOLD) |
---|
1261 | #endif |
---|
1262 | |
---|
1263 | |
---|
1264 | /* Return non-zero if xp,xsize and yp,ysize overlap. |
---|
1265 | If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no |
---|
1266 | overlap. If both these are false, there's an overlap. */ |
---|
1267 | #define MPN_OVERLAP_P(xp, xsize, yp, ysize) \ |
---|
1268 | ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp)) |
---|
1269 | #define MEM_OVERLAP_P(xp, xsize, yp, ysize) \ |
---|
1270 | ( (char *) (xp) + (xsize) > (char *) (yp) \ |
---|
1271 | && (char *) (yp) + (ysize) > (char *) (xp)) |
---|
1272 | |
---|
1273 | /* Return non-zero if xp,xsize and yp,ysize are either identical or not |
---|
1274 | overlapping. Return zero if they're partially overlapping. */ |
---|
1275 | #define MPN_SAME_OR_SEPARATE_P(xp, yp, size) \ |
---|
1276 | MPN_SAME_OR_SEPARATE2_P(xp, size, yp, size) |
---|
1277 | #define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize) \ |
---|
1278 | ((xp) == (yp) || ! MPN_OVERLAP_P (xp, xsize, yp, ysize)) |
---|
1279 | |
---|
1280 | /* Return non-zero if dst,dsize and src,ssize are either identical or |
---|
1281 | overlapping in a way suitable for an incrementing/decrementing algorithm. |
---|
1282 | Return zero if they're partially overlapping in an unsuitable fashion. */ |
---|
1283 | #define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize) \ |
---|
1284 | ((dst) <= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize)) |
---|
1285 | #define MPN_SAME_OR_INCR_P(dst, src, size) \ |
---|
1286 | MPN_SAME_OR_INCR2_P(dst, size, src, size) |
---|
1287 | #define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize) \ |
---|
1288 | ((dst) >= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize)) |
---|
1289 | #define MPN_SAME_OR_DECR_P(dst, src, size) \ |
---|
1290 | MPN_SAME_OR_DECR2_P(dst, size, src, size) |
---|
1291 | |
---|
1292 | |
---|
1293 | /* ASSERT() is a private assertion checking scheme, similar to <assert.h>. |
---|
1294 | ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS() |
---|
1295 | does it always. Generally assertions are meant for development, but |
---|
1296 | might help when looking for a problem later too. |
---|
1297 | |
---|
1298 | Note that strings shouldn't be used within the ASSERT expression, |
---|
1299 | eg. ASSERT(strcmp(s,"notgood")!=0), since the quotes upset the "expr" |
---|
1300 | used in the !HAVE_STRINGIZE case (ie. K&R). */ |
---|
1301 | |
---|
1302 | #ifdef __LINE__ |
---|
1303 | #define ASSERT_LINE __LINE__ |
---|
1304 | #else |
---|
1305 | #define ASSERT_LINE -1 |
---|
1306 | #endif |
---|
1307 | |
---|
1308 | #ifdef __FILE__ |
---|
1309 | #define ASSERT_FILE __FILE__ |
---|
1310 | #else |
---|
1311 | #define ASSERT_FILE "" |
---|
1312 | #endif |
---|
1313 | |
---|
1314 | void __gmp_assert_header _PROTO ((const char *filename, int linenum)); |
---|
1315 | __GMP_DECLSPEC void __gmp_assert_fail _PROTO ((const char *filename, int linenum, const char *expr)) ATTRIBUTE_NORETURN; |
---|
1316 | |
---|
1317 | #if HAVE_STRINGIZE |
---|
1318 | #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr) |
---|
1319 | #else |
---|
1320 | #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr") |
---|
1321 | #endif |
---|
1322 | |
---|
1323 | #define ASSERT_ALWAYS(expr) \ |
---|
1324 | do { \ |
---|
1325 | if (!(expr)) \ |
---|
1326 | ASSERT_FAIL (expr); \ |
---|
1327 | } while (0) |
---|
1328 | |
---|
1329 | #if WANT_ASSERT |
---|
1330 | #define ASSERT(expr) ASSERT_ALWAYS (expr) |
---|
1331 | #else |
---|
1332 | #define ASSERT(expr) do {} while (0) |
---|
1333 | #endif |
---|
1334 | |
---|
1335 | |
---|
1336 | /* ASSERT_CARRY checks the expression is non-zero, and ASSERT_NOCARRY checks |
---|
1337 | that it's zero. In both cases if assertion checking is disabled the |
---|
1338 | expression is still evaluated. These macros are meant for use with |
---|
1339 | routines like mpn_add_n() where the return value represents a carry or |
---|
1340 | whatever that should or shouldn't occur in some context. For example, |
---|
1341 | ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */ |
---|
1342 | #if WANT_ASSERT |
---|
1343 | #define ASSERT_CARRY(expr) ASSERT_ALWAYS ((expr) != 0) |
---|
1344 | #define ASSERT_NOCARRY(expr) ASSERT_ALWAYS ((expr) == 0) |
---|
1345 | #else |
---|
1346 | #define ASSERT_CARRY(expr) (expr) |
---|
1347 | #define ASSERT_NOCARRY(expr) (expr) |
---|
1348 | #endif |
---|
1349 | |
---|
1350 | |
---|
1351 | /* ASSERT_CODE includes code when assertion checking is wanted. This is the |
---|
1352 | same as writing "#if WANT_ASSERT", but more compact. */ |
---|
1353 | #if WANT_ASSERT |
---|
1354 | #define ASSERT_CODE(expr) expr |
---|
1355 | #else |
---|
1356 | #define ASSERT_CODE(expr) |
---|
1357 | #endif |
---|
1358 | |
---|
1359 | |
---|
1360 | /* Test that an mpq_t is in fully canonical form. This can be used as |
---|
1361 | protection on routines like mpq_equal which give wrong results on |
---|
1362 | non-canonical inputs. */ |
---|
1363 | #if WANT_ASSERT |
---|
1364 | #define ASSERT_MPQ_CANONICAL(q) \ |
---|
1365 | do { \ |
---|
1366 | ASSERT (q->_mp_den._mp_size > 0); \ |
---|
1367 | if (q->_mp_num._mp_size == 0) \ |
---|
1368 | { \ |
---|
1369 | /* zero should be 0/1 */ \ |
---|
1370 | ASSERT (mpz_cmp_ui (mpq_denref(q), 1L) == 0); \ |
---|
1371 | } \ |
---|
1372 | else \ |
---|
1373 | { \ |
---|
1374 | /* no common factors */ \ |
---|
1375 | mpz_t __g; \ |
---|
1376 | mpz_init (__g); \ |
---|
1377 | mpz_gcd (__g, mpq_numref(q), mpq_denref(q)); \ |
---|
1378 | ASSERT (mpz_cmp_ui (__g, 1) == 0); \ |
---|
1379 | mpz_clear (__g); \ |
---|
1380 | } \ |
---|
1381 | } while (0) |
---|
1382 | #else |
---|
1383 | #define ASSERT_MPQ_CANONICAL(q) do {} while (0) |
---|
1384 | #endif |
---|
1385 | |
---|
1386 | /* Check that the nail parts are zero. */ |
---|
1387 | #define ASSERT_ALWAYS_LIMB(limb) \ |
---|
1388 | do { \ |
---|
1389 | mp_limb_t __nail = (limb) & GMP_NAIL_MASK; \ |
---|
1390 | ASSERT_ALWAYS (__nail == 0); \ |
---|
1391 | } while (0) |
---|
1392 | #define ASSERT_ALWAYS_MPN(ptr, size) \ |
---|
1393 | do { \ |
---|
1394 | /* let whole loop go dead when no nails */ \ |
---|
1395 | if (GMP_NAIL_BITS != 0) \ |
---|
1396 | { \ |
---|
1397 | mp_size_t __i; \ |
---|
1398 | for (__i = 0; __i < (size); __i++) \ |
---|
1399 | ASSERT_ALWAYS_LIMB ((ptr)[__i]); \ |
---|
1400 | } \ |
---|
1401 | } while (0) |
---|
1402 | #if WANT_ASSERT |
---|
1403 | #define ASSERT_LIMB(limb) ASSERT_ALWAYS_LIMB (limb) |
---|
1404 | #define ASSERT_MPN(ptr, size) ASSERT_ALWAYS_MPN (ptr, size) |
---|
1405 | #else |
---|
1406 | #define ASSERT_LIMB(limb) do {} while (0) |
---|
1407 | #define ASSERT_MPN(ptr, size) do {} while (0) |
---|
1408 | #endif |
---|
1409 | |
---|
1410 | |
---|
1411 | /* Assert that an mpn region {ptr,size} is zero, or non-zero. |
---|
1412 | size==0 is allowed, and in that case {ptr,size} considered to be zero. */ |
---|
1413 | #if WANT_ASSERT |
---|
1414 | #define ASSERT_MPN_ZERO_P(ptr,size) \ |
---|
1415 | do { \ |
---|
1416 | mp_size_t __i; \ |
---|
1417 | ASSERT ((size) >= 0); \ |
---|
1418 | for (__i = 0; __i < (size); __i++) \ |
---|
1419 | ASSERT ((ptr)[__i] == 0); \ |
---|
1420 | } while (0) |
---|
1421 | #define ASSERT_MPN_NONZERO_P(ptr,size) \ |
---|
1422 | do { \ |
---|
1423 | mp_size_t __i; \ |
---|
1424 | int __nonzero = 0; \ |
---|
1425 | ASSERT ((size) >= 0); \ |
---|
1426 | for (__i = 0; __i < (size); __i++) \ |
---|
1427 | if ((ptr)[__i] != 0) \ |
---|
1428 | { \ |
---|
1429 | __nonzero = 1; \ |
---|
1430 | break; \ |
---|
1431 | } \ |
---|
1432 | ASSERT (__nonzero); \ |
---|
1433 | } while (0) |
---|
1434 | #else |
---|
1435 | #define ASSERT_MPN_ZERO_P(ptr,size) do {} while (0) |
---|
1436 | #define ASSERT_MPN_NONZERO_P(ptr,size) do {} while (0) |
---|
1437 | #endif |
---|
1438 | |
---|
1439 | |
---|
1440 | #if HAVE_NATIVE_mpn_com_n |
---|
1441 | #define mpn_com_n __MPN(com_n) |
---|
1442 | void mpn_com_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t)); |
---|
1443 | #else |
---|
1444 | #define mpn_com_n(d,s,n) \ |
---|
1445 | do { \ |
---|
1446 | mp_ptr __d = (d); \ |
---|
1447 | mp_srcptr __s = (s); \ |
---|
1448 | mp_size_t __n = (n); \ |
---|
1449 | ASSERT (__n >= 1); \ |
---|
1450 | ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n)); \ |
---|
1451 | do \ |
---|
1452 | *__d++ = (~ *__s++) & GMP_NUMB_MASK; \ |
---|
1453 | while (--__n); \ |
---|
1454 | } while (0) |
---|
1455 | #endif |
---|
1456 | |
---|
1457 | #define MPN_LOGOPS_N_INLINE(d, s1, s2, n, operation) \ |
---|
1458 | do { \ |
---|
1459 | mp_ptr __d = (d); \ |
---|
1460 | mp_srcptr __s1 = (s1); \ |
---|
1461 | mp_srcptr __s2 = (s2); \ |
---|
1462 | mp_size_t __n = (n); \ |
---|
1463 | ASSERT (__n >= 1); \ |
---|
1464 | ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s1, __n)); \ |
---|
1465 | ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s2, __n)); \ |
---|
1466 | do \ |
---|
1467 | operation; \ |
---|
1468 | while (--__n); \ |
---|
1469 | } while (0) |
---|
1470 | |
---|
1471 | #if HAVE_NATIVE_mpn_and_n |
---|
1472 | #define mpn_and_n __MPN(and_n) |
---|
1473 | void mpn_and_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); |
---|
1474 | #else |
---|
1475 | #define mpn_and_n(d, s1, s2, n) \ |
---|
1476 | MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ & *__s2++) |
---|
1477 | #endif |
---|
1478 | |
---|
1479 | #if HAVE_NATIVE_mpn_andn_n |
---|
1480 | #define mpn_andn_n __MPN(andn_n) |
---|
1481 | void mpn_andn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); |
---|
1482 | #else |
---|
1483 | #define mpn_andn_n(d, s1, s2, n) \ |
---|
1484 | MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ & ~*__s2++) |
---|
1485 | #endif |
---|
1486 | |
---|
1487 | #if HAVE_NATIVE_mpn_nand_n |
---|
1488 | #define mpn_nand_n __MPN(nand_n) |
---|
1489 | void mpn_nand_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); |
---|
1490 | #else |
---|
1491 | #define mpn_nand_n(d, s1, s2, n) \ |
---|
1492 | MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~ (*__s1++ & *__s2++)) |
---|
1493 | #endif |
---|
1494 | |
---|
1495 | #if HAVE_NATIVE_mpn_ior_n |
---|
1496 | #define mpn_ior_n __MPN(ior_n) |
---|
1497 | void mpn_ior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); |
---|
1498 | #else |
---|
1499 | #define mpn_ior_n(d, s1, s2, n) \ |
---|
1500 | MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ | *__s2++) |
---|
1501 | #endif |
---|
1502 | |
---|
1503 | #if HAVE_NATIVE_mpn_iorn_n |
---|
1504 | #define mpn_iorn_n __MPN(iorn_n) |
---|
1505 | void mpn_iorn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); |
---|
1506 | #else |
---|
1507 | #define mpn_iorn_n(d, s1, s2, n) \ |
---|
1508 | MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ | ~*__s2++) |
---|
1509 | #endif |
---|
1510 | |
---|
1511 | #if HAVE_NATIVE_mpn_nior_n |
---|
1512 | #define mpn_nior_n __MPN(nior_n) |
---|
1513 | void mpn_nior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); |
---|
1514 | #else |
---|
1515 | #define mpn_nior_n(d, s1, s2, n) \ |
---|
1516 | MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~ (*__s1++ | *__s2++)) |
---|
1517 | #endif |
---|
1518 | |
---|
1519 | #if HAVE_NATIVE_mpn_xor_n |
---|
1520 | #define mpn_xor_n __MPN(xor_n) |
---|
1521 | void mpn_xor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); |
---|
1522 | #else |
---|
1523 | #define mpn_xor_n(d, s1, s2, n) \ |
---|
1524 | MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ ^ *__s2++) |
---|
1525 | #endif |
---|
1526 | |
---|
1527 | #if HAVE_NATIVE_mpn_xnor_n |
---|
1528 | #define mpn_xnor_n __MPN(xnor_n) |
---|
1529 | void mpn_xnor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); |
---|
1530 | #else |
---|
1531 | #define mpn_xnor_n(d, s1, s2, n) \ |
---|
1532 | MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~ (*__s1++ ^ *__s2++)) |
---|
1533 | #endif |
---|
1534 | |
---|
1535 | |
---|
1536 | /* SUBC_LIMB sets w=x-y and cout to 0 or 1 for a borrow from that |
---|
1537 | subtract. */ |
---|
1538 | #if GMP_NAIL_BITS == 0 |
---|
1539 | #define SUBC_LIMB(cout, w, x, y) \ |
---|
1540 | do { \ |
---|
1541 | mp_limb_t __x = (x); \ |
---|
1542 | mp_limb_t __y = (y); \ |
---|
1543 | mp_limb_t __w = __x - __y; \ |
---|
1544 | (w) = __w; \ |
---|
1545 | (cout) = __w > __x; \ |
---|
1546 | } while (0) |
---|
1547 | #else |
---|
1548 | #define SUBC_LIMB(cout, w, x, y) \ |
---|
1549 | do { \ |
---|
1550 | mp_limb_t __w = (x) - (y); \ |
---|
1551 | (w) = __w & GMP_NUMB_MASK; \ |
---|
1552 | (cout) = __w >> (GMP_LIMB_BITS-1); \ |
---|
1553 | } while (0) |
---|
1554 | #endif |
---|
1555 | |
---|
1556 | |
---|
1557 | /* MPN_INCR_U does {ptr,size} += n, MPN_DECR_U does {ptr,size} -= n, both |
---|
1558 | expecting no carry (or borrow) from that. |
---|
1559 | |
---|
1560 | The size parameter is only for the benefit of assertion checking. In a |
---|
1561 | normal build it's unused and the carry/borrow is just propagated as far |
---|
1562 | as it needs to go. |
---|
1563 | |
---|
1564 | On random data, usually only one or two limbs of {ptr,size} get updated, |
---|
1565 | so there's no need for any sophisticated looping, just something compact |
---|
1566 | and sensible. |
---|
1567 | |
---|
1568 | FIXME: Do the generic MPN_{INCR,DECR}_U with a block of code like |
---|
1569 | mpn_incr_u but with the assertions built in, rather than the separate |
---|
1570 | add_1 and sub_1 when assertion checking. |
---|
1571 | |
---|
1572 | FIXME: Switch all code from mpn_{incr,decr}_u to MPN_{INCR,DECR}_U, |
---|
1573 | declaring their operand sizes, then remove the former. This is purely |
---|
1574 | for the benefit of assertion checking. */ |
---|
1575 | |
---|
1576 | #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86 && GMP_NAIL_BITS == 0 \ |
---|
1577 | && BITS_PER_MP_LIMB == 32 && ! defined (NO_ASM) && ! WANT_ASSERT |
---|
1578 | /* Better flags handling than the generic C gives on i386, saving a few |
---|
1579 | bytes of code and maybe a cycle or two. */ |
---|
1580 | |
---|
1581 | #define MPN_IORD_U(ptr, incr, aors) \ |
---|
1582 | do { \ |
---|
1583 | mp_ptr __ptr_dummy; \ |
---|
1584 | if (__builtin_constant_p (incr) && (incr) == 1) \ |
---|
1585 | { \ |
---|
1586 | __asm__ __volatile__ \ |
---|
1587 | ("\n" ASM_L(top) ":\n" \ |
---|
1588 | "\t" aors " $1, (%0)\n" \ |
---|
1589 | "\tleal 4(%0),%0\n" \ |
---|
1590 | "\tjc " ASM_L(top) \ |
---|
1591 | : "=r" (__ptr_dummy) \ |
---|
1592 | : "0" (ptr) \ |
---|
1593 | : "memory"); \ |
---|
1594 | } \ |
---|
1595 | else \ |
---|
1596 | { \ |
---|
1597 | __asm__ __volatile__ \ |
---|
1598 | ( aors " %2,(%0)\n" \ |
---|
1599 | "\tjnc " ASM_L(done) "\n" \ |
---|
1600 | ASM_L(top) ":\n" \ |
---|
1601 | "\t" aors " $1,4(%0)\n" \ |
---|
1602 | "\tleal 4(%0),%0\n" \ |
---|
1603 | "\tjc " ASM_L(top) "\n" \ |
---|
1604 | ASM_L(done) ":\n" \ |
---|
1605 | : "=r" (__ptr_dummy) \ |
---|
1606 | : "0" (ptr), \ |
---|
1607 | "ri" (incr) \ |
---|
1608 | : "memory"); \ |
---|
1609 | } \ |
---|
1610 | } while (0) |
---|
1611 | |
---|
1612 | #define MPN_INCR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "addl") |
---|
1613 | #define MPN_DECR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "subl") |
---|
1614 | #define mpn_incr_u(ptr, incr) MPN_INCR_U (ptr, 0, incr) |
---|
1615 | #define mpn_decr_u(ptr, incr) MPN_DECR_U (ptr, 0, incr) |
---|
1616 | #endif |
---|
1617 | |
---|
1618 | #if GMP_NAIL_BITS == 0 |
---|
1619 | #ifndef mpn_incr_u |
---|
1620 | #define mpn_incr_u(p,incr) \ |
---|
1621 | do { \ |
---|
1622 | mp_limb_t __x; \ |
---|
1623 | mp_ptr __p = (p); \ |
---|
1624 | if (__builtin_constant_p (incr) && (incr) == 1) \ |
---|
1625 | { \ |
---|
1626 | while (++(*(__p++)) == 0) \ |
---|
1627 | ; \ |
---|
1628 | } \ |
---|
1629 | else \ |
---|
1630 | { \ |
---|
1631 | __x = *__p + (incr); \ |
---|
1632 | *__p = __x; \ |
---|
1633 | if (__x < (incr)) \ |
---|
1634 | while (++(*(++__p)) == 0) \ |
---|
1635 | ; \ |
---|
1636 | } \ |
---|
1637 | } while (0) |
---|
1638 | #endif |
---|
1639 | #ifndef mpn_decr_u |
---|
1640 | #define mpn_decr_u(p,incr) \ |
---|
1641 | do { \ |
---|
1642 | mp_limb_t __x; \ |
---|
1643 | mp_ptr __p = (p); \ |
---|
1644 | if (__builtin_constant_p (incr) && (incr) == 1) \ |
---|
1645 | { \ |
---|
1646 | while ((*(__p++))-- == 0) \ |
---|
1647 | ; \ |
---|
1648 | } \ |
---|
1649 | else \ |
---|
1650 | { \ |
---|
1651 | __x = *__p; \ |
---|
1652 | *__p = __x - (incr); \ |
---|
1653 | if (__x < (incr)) \ |
---|
1654 | while ((*(++__p))-- == 0) \ |
---|
1655 | ; \ |
---|
1656 | } \ |
---|
1657 | } while (0) |
---|
1658 | #endif |
---|
1659 | #endif |
---|
1660 | |
---|
1661 | #if GMP_NAIL_BITS >= 1 |
---|
1662 | #ifndef mpn_incr_u |
---|
1663 | #define mpn_incr_u(p,incr) \ |
---|
1664 | do { \ |
---|
1665 | mp_limb_t __x; \ |
---|
1666 | mp_ptr __p = (p); \ |
---|
1667 | if (__builtin_constant_p (incr) && (incr) == 1) \ |
---|
1668 | { \ |
---|
1669 | do \ |
---|
1670 | { \ |
---|
1671 | __x = (*__p + 1) & GMP_NUMB_MASK; \ |
---|
1672 | *__p++ = __x; \ |
---|
1673 | } \ |
---|
1674 | while (__x == 0); \ |
---|
1675 | } \ |
---|
1676 | else \ |
---|
1677 | { \ |
---|
1678 | __x = (*__p + (incr)); \ |
---|
1679 | *__p++ = __x & GMP_NUMB_MASK; \ |
---|
1680 | if (__x >> GMP_NUMB_BITS != 0) \ |
---|
1681 | { \ |
---|
1682 | do \ |
---|
1683 | { \ |
---|
1684 | __x = (*__p + 1) & GMP_NUMB_MASK; \ |
---|
1685 | *__p++ = __x; \ |
---|
1686 | } \ |
---|
1687 | while (__x == 0); \ |
---|
1688 | } \ |
---|
1689 | } \ |
---|
1690 | } while (0) |
---|
1691 | #endif |
---|
1692 | #ifndef mpn_decr_u |
---|
1693 | #define mpn_decr_u(p,incr) \ |
---|
1694 | do { \ |
---|
1695 | mp_limb_t __x; \ |
---|
1696 | mp_ptr __p = (p); \ |
---|
1697 | if (__builtin_constant_p (incr) && (incr) == 1) \ |
---|
1698 | { \ |
---|
1699 | do \ |
---|
1700 | { \ |
---|
1701 | __x = *__p; \ |
---|
1702 | *__p++ = (__x - 1) & GMP_NUMB_MASK; \ |
---|
1703 | } \ |
---|
1704 | while (__x == 0); \ |
---|
1705 | } \ |
---|
1706 | else \ |
---|
1707 | { \ |
---|
1708 | __x = *__p - (incr); \ |
---|
1709 | *__p++ = __x & GMP_NUMB_MASK; \ |
---|
1710 | if (__x >> GMP_NUMB_BITS != 0) \ |
---|
1711 | { \ |
---|
1712 | do \ |
---|
1713 | { \ |
---|
1714 | __x = *__p; \ |
---|
1715 | *__p++ = (__x - 1) & GMP_NUMB_MASK; \ |
---|
1716 | } \ |
---|
1717 | while (__x == 0); \ |
---|
1718 | } \ |
---|
1719 | } \ |
---|
1720 | } while (0) |
---|
1721 | #endif |
---|
1722 | #endif |
---|
1723 | |
---|
1724 | #ifndef MPN_INCR_U |
---|
1725 | #if WANT_ASSERT |
---|
1726 | #define MPN_INCR_U(ptr, size, n) \ |
---|
1727 | do { \ |
---|
1728 | ASSERT ((size) >= 1); \ |
---|
1729 | ASSERT_NOCARRY (mpn_add_1 (ptr, ptr, size, n)); \ |
---|
1730 | } while (0) |
---|
1731 | #else |
---|
1732 | #define MPN_INCR_U(ptr, size, n) mpn_incr_u (ptr, n) |
---|
1733 | #endif |
---|
1734 | #endif |
---|
1735 | |
---|
1736 | #ifndef MPN_DECR_U |
---|
1737 | #if WANT_ASSERT |
---|
1738 | #define MPN_DECR_U(ptr, size, n) \ |
---|
1739 | do { \ |
---|
1740 | ASSERT ((size) >= 1); \ |
---|
1741 | ASSERT_NOCARRY (mpn_sub_1 (ptr, ptr, size, n)); \ |
---|
1742 | } while (0) |
---|
1743 | #else |
---|
1744 | #define MPN_DECR_U(ptr, size, n) mpn_decr_u (ptr, n) |
---|
1745 | #endif |
---|
1746 | #endif |
---|
1747 | |
---|
1748 | |
---|
1749 | /* Structure for conversion between internal binary format and |
---|
1750 | strings in base 2..36. */ |
---|
1751 | struct bases |
---|
1752 | { |
---|
1753 | /* Number of digits in the conversion base that always fits in an mp_limb_t. |
---|
1754 | For example, for base 10 on a machine where a mp_limb_t has 32 bits this |
---|
1755 | is 9, since 10**9 is the largest number that fits into a mp_limb_t. */ |
---|
1756 | int chars_per_limb; |
---|
1757 | |
---|
1758 | /* log(2)/log(conversion_base) */ |
---|
1759 | double chars_per_bit_exactly; |
---|
1760 | |
---|
1761 | /* base**chars_per_limb, i.e. the biggest number that fits a word, built by |
---|
1762 | factors of base. Exception: For 2, 4, 8, etc, big_base is log2(base), |
---|
1763 | i.e. the number of bits used to represent each digit in the base. */ |
---|
1764 | mp_limb_t big_base; |
---|
1765 | |
---|
1766 | /* A BITS_PER_MP_LIMB bit approximation to 1/big_base, represented as a |
---|
1767 | fixed-point number. Instead of dividing by big_base an application can |
---|
1768 | choose to multiply by big_base_inverted. */ |
---|
1769 | mp_limb_t big_base_inverted; |
---|
1770 | }; |
---|
1771 | |
---|
1772 | #define mp_bases __MPN(bases) |
---|
1773 | #define __mp_bases __MPN(bases) |
---|
1774 | __GMP_DECLSPEC extern const struct bases mp_bases[257]; |
---|
1775 | |
---|
1776 | /* mp_bases[10] values, generated by mpn/mp_bases.c */ |
---|
1777 | #if GMP_NUMB_BITS == 4 |
---|
1778 | #define MP_BASES_CHARS_PER_LIMB_10 1 |
---|
1779 | #define MP_BASES_BIG_BASE_10 CNST_LIMB(0xa) |
---|
1780 | #define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0x9) |
---|
1781 | #define MP_BASES_NORMALIZATION_STEPS_10 0 |
---|
1782 | #endif |
---|
1783 | #if GMP_NUMB_BITS == 8 |
---|
1784 | #define MP_BASES_CHARS_PER_LIMB_10 2 |
---|
1785 | #define MP_BASES_BIG_BASE_10 CNST_LIMB(0x64) |
---|
1786 | #define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0x47) |
---|
1787 | #define MP_BASES_NORMALIZATION_STEPS_10 1 |
---|
1788 | #endif |
---|
1789 | #if GMP_NUMB_BITS == 16 |
---|
1790 | #define MP_BASES_CHARS_PER_LIMB_10 4 |
---|
1791 | #define MP_BASES_BIG_BASE_10 CNST_LIMB(0x2710) |
---|
1792 | #define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0xa36e) |
---|
1793 | #define MP_BASES_NORMALIZATION_STEPS_10 2 |
---|
1794 | #endif |
---|
1795 | #if GMP_NUMB_BITS == 28 |
---|
1796 | #define MP_BASES_CHARS_PER_LIMB_10 8 |
---|
1797 | #define MP_BASES_BIG_BASE_10 CNST_LIMB(0x5f5e100) |
---|
1798 | #define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0x5798ee23) |
---|
1799 | #define MP_BASES_NORMALIZATION_STEPS_10 5 |
---|
1800 | #endif |
---|
1801 | #if GMP_NUMB_BITS == 30 |
---|
1802 | #define MP_BASES_CHARS_PER_LIMB_10 9 |
---|
1803 | #define MP_BASES_BIG_BASE_10 CNST_LIMB(0x3b9aca00) |
---|
1804 | #define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0x12e0be82) |
---|
1805 | #define MP_BASES_NORMALIZATION_STEPS_10 2 |
---|
1806 | #endif |
---|
1807 | #if GMP_NUMB_BITS == 32 |
---|
1808 | #define MP_BASES_CHARS_PER_LIMB_10 9 |
---|
1809 | #define MP_BASES_BIG_BASE_10 CNST_LIMB(0x3b9aca00) |
---|
1810 | #define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0x12e0be82) |
---|
1811 | #define MP_BASES_NORMALIZATION_STEPS_10 2 |
---|
1812 | #endif |
---|
1813 | #if GMP_NUMB_BITS == 60 |
---|
1814 | #define MP_BASES_CHARS_PER_LIMB_10 18 |
---|
1815 | #define MP_BASES_BIG_BASE_10 CNST_LIMB(0xde0b6b3a7640000) |
---|
1816 | #define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0x2725dd1d243aba0e) |
---|
1817 | #define MP_BASES_NORMALIZATION_STEPS_10 4 |
---|
1818 | #endif |
---|
1819 | #if GMP_NUMB_BITS == 62 |
---|
1820 | #define MP_BASES_CHARS_PER_LIMB_10 18 |
---|
1821 | #define MP_BASES_BIG_BASE_10 CNST_LIMB(0xde0b6b3a7640000) |
---|
1822 | #define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0x2725dd1d243aba0e) |
---|
1823 | #define MP_BASES_NORMALIZATION_STEPS_10 4 |
---|
1824 | #endif |
---|
1825 | #if GMP_NUMB_BITS == 64 |
---|
1826 | #define MP_BASES_CHARS_PER_LIMB_10 19 |
---|
1827 | #define MP_BASES_BIG_BASE_10 CNST_LIMB(0x8ac7230489e80000) |
---|
1828 | #define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0xd83c94fb6d2ac34a) |
---|
1829 | #define MP_BASES_NORMALIZATION_STEPS_10 0 |
---|
1830 | #endif |
---|
1831 | |
---|
1832 | |
---|
1833 | /* For power of 2 bases this is exact. For other bases the result is either |
---|
1834 | exact or one too big. |
---|
1835 | |
---|
1836 | To be exact always it'd be necessary to examine all the limbs of the |
---|
1837 | operand, since numbers like 100..000 and 99...999 generally differ only |
---|
1838 | in the lowest limb. It'd be possible to examine just a couple of high |
---|
1839 | limbs to increase the probability of being exact, but that doesn't seem |
---|
1840 | worth bothering with. */ |
---|
1841 | |
---|
1842 | #define MPN_SIZEINBASE(result, ptr, size, base) \ |
---|
1843 | do { \ |
---|
1844 | int __lb_base, __cnt; \ |
---|
1845 | mp_size_t __totbits; \ |
---|
1846 | \ |
---|
1847 | ASSERT ((size) >= 0); \ |
---|
1848 | ASSERT ((base) >= 2); \ |
---|
1849 | ASSERT ((base) < numberof (mp_bases)); \ |
---|
1850 | \ |
---|
1851 | /* Special case for X == 0. */ \ |
---|
1852 | if ((size) == 0) \ |
---|
1853 | (result) = 1; \ |
---|
1854 | else \ |
---|
1855 | { \ |
---|
1856 | /* Calculate the total number of significant bits of X. */ \ |
---|
1857 | count_leading_zeros (__cnt, (ptr)[(size)-1]); \ |
---|
1858 | __totbits = (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS); \ |
---|
1859 | \ |
---|
1860 | if (POW2_P (base)) \ |
---|
1861 | { \ |
---|
1862 | __lb_base = mp_bases[base].big_base; \ |
---|
1863 | (result) = (__totbits + __lb_base - 1) / __lb_base; \ |
---|
1864 | } \ |
---|
1865 | else \ |
---|
1866 | (result) = (size_t) \ |
---|
1867 | (__totbits * mp_bases[base].chars_per_bit_exactly) + 1; \ |
---|
1868 | } \ |
---|
1869 | } while (0) |
---|
1870 | |
---|
1871 | /* eliminate mp_bases lookups for base==16 */ |
---|
1872 | #define MPN_SIZEINBASE_16(result, ptr, size) \ |
---|
1873 | do { \ |
---|
1874 | int __cnt; \ |
---|
1875 | mp_size_t __totbits; \ |
---|
1876 | \ |
---|
1877 | ASSERT ((size) >= 0); \ |
---|
1878 | \ |
---|
1879 | /* Special case for X == 0. */ \ |
---|
1880 | if ((size) == 0) \ |
---|
1881 | (result) = 1; \ |
---|
1882 | else \ |
---|
1883 | { \ |
---|
1884 | /* Calculate the total number of significant bits of X. */ \ |
---|
1885 | count_leading_zeros (__cnt, (ptr)[(size)-1]); \ |
---|
1886 | __totbits = (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS); \ |
---|
1887 | (result) = (__totbits + 4 - 1) / 4; \ |
---|
1888 | } \ |
---|
1889 | } while (0) |
---|
1890 | |
---|
1891 | |
---|
1892 | #if HAVE_HOST_CPU_FAMILY_x86 |
---|
1893 | #define TARGET_REGISTER_STARVED 1 |
---|
1894 | #else |
---|
1895 | #define TARGET_REGISTER_STARVED 0 |
---|
1896 | #endif |
---|
1897 | |
---|
1898 | /* Use a library function for invert_limb, if available. */ |
---|
1899 | #if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb |
---|
1900 | #define mpn_invert_limb __MPN(invert_limb) |
---|
1901 | mp_limb_t mpn_invert_limb _PROTO ((mp_limb_t)) ATTRIBUTE_CONST; |
---|
1902 | #define invert_limb(invxl,xl) (invxl = mpn_invert_limb (xl)) |
---|
1903 | #endif |
---|
1904 | |
---|
1905 | #ifndef invert_limb |
---|
1906 | #define invert_limb(invxl,xl) \ |
---|
1907 | do { \ |
---|
1908 | mp_limb_t dummy; \ |
---|
1909 | ASSERT ((xl) != 0); \ |
---|
1910 | if (xl << 1 == 0) \ |
---|
1911 | invxl = ~(mp_limb_t) 0; \ |
---|
1912 | else \ |
---|
1913 | udiv_qrnnd (invxl, dummy, -xl, 0, xl); \ |
---|
1914 | } while (0) |
---|
1915 | #endif |
---|
1916 | |
---|
1917 | /* Divide the two-limb number in (NH,,NL) by D, with DI being the largest |
---|
1918 | limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB). |
---|
1919 | If this would yield overflow, DI should be the largest possible number |
---|
1920 | (i.e., only ones). For correct operation, the most significant bit of D |
---|
1921 | has to be set. Put the quotient in Q and the remainder in R. */ |
---|
1922 | #define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \ |
---|
1923 | do { \ |
---|
1924 | mp_limb_t _q, _ql, _r; \ |
---|
1925 | mp_limb_t _xh, _xl; \ |
---|
1926 | ASSERT ((d) != 0); \ |
---|
1927 | umul_ppmm (_q, _ql, (nh), (di)); \ |
---|
1928 | _q += (nh); /* DI is 2**BITS_PER_MP_LIMB too small */ \ |
---|
1929 | umul_ppmm (_xh, _xl, _q, (d)); \ |
---|
1930 | sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl); \ |
---|
1931 | if (_xh != 0) \ |
---|
1932 | { \ |
---|
1933 | sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \ |
---|
1934 | _q += 1; \ |
---|
1935 | if (_xh != 0) \ |
---|
1936 | { \ |
---|
1937 | _r -= (d); \ |
---|
1938 | _q += 1; \ |
---|
1939 | } \ |
---|
1940 | } \ |
---|
1941 | if (_r >= (d)) \ |
---|
1942 | { \ |
---|
1943 | _r -= (d); \ |
---|
1944 | _q += 1; \ |
---|
1945 | } \ |
---|
1946 | (r) = _r; \ |
---|
1947 | (q) = _q; \ |
---|
1948 | } while (0) |
---|
1949 | /* Like udiv_qrnnd_preinv, but for for any value D. DNORM is D shifted left |
---|
1950 | so that its most significant bit is set. LGUP is ceil(log2(D)). */ |
---|
1951 | #define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \ |
---|
1952 | do { \ |
---|
1953 | mp_limb_t _n2, _n10, _n1, _nadj, _q1; \ |
---|
1954 | mp_limb_t _xh, _xl; \ |
---|
1955 | _n2 = ((nh) << (BITS_PER_MP_LIMB - (lgup))) + ((nl) >> 1 >> (l - 1));\ |
---|
1956 | _n10 = (nl) << (BITS_PER_MP_LIMB - (lgup)); \ |
---|
1957 | _n1 = ((mp_limb_signed_t) _n10 >> (BITS_PER_MP_LIMB - 1)); \ |
---|
1958 | _nadj = _n10 + (_n1 & (dnorm)); \ |
---|
1959 | umul_ppmm (_xh, _xl, di, _n2 - _n1); \ |
---|
1960 | add_ssaaaa (_xh, _xl, _xh, _xl, 0, _nadj); \ |
---|
1961 | _q1 = ~(_n2 + _xh); \ |
---|
1962 | umul_ppmm (_xh, _xl, _q1, d); \ |
---|
1963 | add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \ |
---|
1964 | _xh -= (d); \ |
---|
1965 | (r) = _xl + ((d) & _xh); \ |
---|
1966 | (q) = _xh - _q1; \ |
---|
1967 | } while (0) |
---|
1968 | /* Exactly like udiv_qrnnd_preinv, but branch-free. It is not clear which |
---|
1969 | version to use. */ |
---|
1970 | #define udiv_qrnnd_preinv2norm(q, r, nh, nl, d, di) \ |
---|
1971 | do { \ |
---|
1972 | mp_limb_t _n2, _n10, _n1, _nadj, _q1; \ |
---|
1973 | mp_limb_t _xh, _xl; \ |
---|
1974 | _n2 = (nh); \ |
---|
1975 | _n10 = (nl); \ |
---|
1976 | _n1 = ((mp_limb_signed_t) _n10 >> (BITS_PER_MP_LIMB - 1)); \ |
---|
1977 | _nadj = _n10 + (_n1 & (d)); \ |
---|
1978 | umul_ppmm (_xh, _xl, di, _n2 - _n1); \ |
---|
1979 | add_ssaaaa (_xh, _xl, _xh, _xl, 0, _nadj); \ |
---|
1980 | _q1 = ~(_n2 + _xh); \ |
---|
1981 | umul_ppmm (_xh, _xl, _q1, d); \ |
---|
1982 | add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \ |
---|
1983 | _xh -= (d); \ |
---|
1984 | (r) = _xl + ((d) & _xh); \ |
---|
1985 | (q) = _xh - _q1; \ |
---|
1986 | } while (0) |
---|
1987 | |
---|
1988 | |
---|
1989 | #define mpn_preinv_divrem_1 __MPN(preinv_divrem_1) |
---|
1990 | mp_limb_t mpn_preinv_divrem_1 _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int)); |
---|
1991 | |
---|
1992 | |
---|
1993 | /* USE_PREINV_DIVREM_1 is whether to use mpn_preinv_divrem_1, as opposed to |
---|
1994 | the plain mpn_divrem_1. Likewise USE_PREINV_MOD_1 chooses between |
---|
1995 | mpn_preinv_mod_1 and plain mpn_mod_1. The default for both is yes, since |
---|
1996 | the few CISC chips where preinv is not good have defines saying so. */ |
---|
1997 | #ifndef USE_PREINV_DIVREM_1 |
---|
1998 | #define USE_PREINV_DIVREM_1 1 |
---|
1999 | #endif |
---|
2000 | #ifndef USE_PREINV_MOD_1 |
---|
2001 | #define USE_PREINV_MOD_1 1 |
---|
2002 | #endif |
---|
2003 | |
---|
2004 | #if USE_PREINV_DIVREM_1 |
---|
2005 | #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \ |
---|
2006 | mpn_preinv_divrem_1 (qp, xsize, ap, size, d, dinv, shift) |
---|
2007 | #else |
---|
2008 | #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \ |
---|
2009 | mpn_divrem_1 (qp, xsize, ap, size, d) |
---|
2010 | #endif |
---|
2011 | |
---|
2012 | #if USE_PREINV_MOD_1 |
---|
2013 | #define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse) \ |
---|
2014 | mpn_preinv_mod_1 (src, size, divisor, inverse) |
---|
2015 | #else |
---|
2016 | #define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse) \ |
---|
2017 | mpn_mod_1 (src, size, divisor) |
---|
2018 | #endif |
---|
2019 | |
---|
2020 | |
---|
2021 | #define mpn_mod_34lsub1 __MPN(mod_34lsub1) |
---|
2022 | mp_limb_t mpn_mod_34lsub1 _PROTO ((mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE; |
---|
2023 | |
---|
2024 | |
---|
2025 | /* DIVEXACT_1_THRESHOLD is at what size to use mpn_divexact_1, as opposed to |
---|
2026 | plain mpn_divrem_1. Likewise MODEXACT_1_ODD_THRESHOLD for |
---|
2027 | mpn_modexact_1_odd against plain mpn_mod_1. On most CPUs divexact and |
---|
2028 | modexact are faster at all sizes, so the defaults are 0. Those CPUs |
---|
2029 | where this is not right have a tuned threshold. */ |
---|
2030 | #ifndef DIVEXACT_1_THRESHOLD |
---|
2031 | #define DIVEXACT_1_THRESHOLD 0 |
---|
2032 | #endif |
---|
2033 | #ifndef MODEXACT_1_ODD_THRESHOLD |
---|
2034 | #define MODEXACT_1_ODD_THRESHOLD 0 |
---|
2035 | #endif |
---|
2036 | |
---|
2037 | #define mpn_divexact_1 __MPN(divexact_1) |
---|
2038 | void mpn_divexact_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)); |
---|
2039 | |
---|
2040 | #define MPN_DIVREM_OR_DIVEXACT_1(dst, src, size, divisor) \ |
---|
2041 | do { \ |
---|
2042 | if (BELOW_THRESHOLD (size, DIVEXACT_1_THRESHOLD)) \ |
---|
2043 | ASSERT_NOCARRY (mpn_divrem_1 (dst, (mp_size_t) 0, src, size, divisor)); \ |
---|
2044 | else \ |
---|
2045 | { \ |
---|
2046 | ASSERT (mpn_mod_1 (src, size, divisor) == 0); \ |
---|
2047 | mpn_divexact_1 (dst, src, size, divisor); \ |
---|
2048 | } \ |
---|
2049 | } while (0) |
---|
2050 | |
---|
2051 | #define mpn_modexact_1c_odd __MPN(modexact_1c_odd) |
---|
2052 | mp_limb_t mpn_modexact_1c_odd _PROTO ((mp_srcptr src, mp_size_t size, |
---|
2053 | mp_limb_t divisor, mp_limb_t c)) __GMP_ATTRIBUTE_PURE; |
---|
2054 | |
---|
2055 | #if HAVE_NATIVE_mpn_modexact_1_odd |
---|
2056 | #define mpn_modexact_1_odd __MPN(modexact_1_odd) |
---|
2057 | mp_limb_t mpn_modexact_1_odd _PROTO ((mp_srcptr src, mp_size_t size, |
---|
2058 | mp_limb_t divisor)) __GMP_ATTRIBUTE_PURE; |
---|
2059 | #else |
---|
2060 | #define mpn_modexact_1_odd(src,size,divisor) \ |
---|
2061 | mpn_modexact_1c_odd (src, size, divisor, CNST_LIMB(0)) |
---|
2062 | #endif |
---|
2063 | |
---|
2064 | #define MPN_MOD_OR_MODEXACT_1_ODD(src,size,divisor) \ |
---|
2065 | (ABOVE_THRESHOLD (size, MODEXACT_1_ODD_THRESHOLD) \ |
---|
2066 | ? mpn_modexact_1_odd (src, size, divisor) \ |
---|
2067 | : mpn_mod_1 (src, size, divisor)) |
---|
2068 | |
---|
2069 | |
---|
2070 | /* modlimb_invert() sets inv to the multiplicative inverse of n modulo |
---|
2071 | 2^BITS_PER_MP_LIMB, ie. satisfying inv*n == 1 mod 2^BITS_PER_MP_LIMB. |
---|
2072 | n must be odd (otherwise such an inverse doesn't exist). |
---|
2073 | |
---|
2074 | This is not to be confused with invert_limb(), which is completely |
---|
2075 | different. |
---|
2076 | |
---|
2077 | The table lookup gives an inverse with the low 8 bits valid, and each |
---|
2078 | multiply step doubles the number of bits. See Jebelean "An algorithm for |
---|
2079 | exact division" end of section 4 (reference in gmp.texi). |
---|
2080 | |
---|
2081 | Possible enhancement: Could use UHWtype until the last step, if half-size |
---|
2082 | multiplies are faster (might help under _LONG_LONG_LIMB). |
---|
2083 | |
---|
2084 | Alternative: As noted in Granlund and Montgomery "Division by Invariant |
---|
2085 | Integers using Multiplication" (reference in gmp.texi), n itself gives a |
---|
2086 | 3-bit inverse immediately, and could be used instead of a table lookup. |
---|
2087 | A 4-bit inverse can be obtained effectively from xoring bits 1 and 2 into |
---|
2088 | bit 3, for instance with (((n + 2) & 4) << 1) ^ n. */ |
---|
2089 | |
---|
2090 | #define modlimb_invert_table __gmp_modlimb_invert_table |
---|
2091 | __GMP_DECLSPEC extern const unsigned char modlimb_invert_table[128]; |
---|
2092 | |
---|
2093 | #if BITS_PER_MP_LIMB <= 8 |
---|
2094 | #define modlimb_invert(inv,n) \ |
---|
2095 | do { \ |
---|
2096 | mp_limb_t __n = (n); \ |
---|
2097 | mp_limb_t __inv; \ |
---|
2098 | ASSERT ((__n & 1) == 1); \ |
---|
2099 | __inv = modlimb_invert_table[(__n/2) & 0x7F]; /* 8 */ \ |
---|
2100 | ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1); \ |
---|
2101 | (inv) = __inv & GMP_NUMB_MASK; \ |
---|
2102 | } while (0) |
---|
2103 | #else |
---|
2104 | #if BITS_PER_MP_LIMB <= 16 |
---|
2105 | #define modlimb_invert(inv,n) \ |
---|
2106 | do { \ |
---|
2107 | mp_limb_t __n = (n); \ |
---|
2108 | mp_limb_t __inv; \ |
---|
2109 | ASSERT ((__n & 1) == 1); \ |
---|
2110 | __inv = modlimb_invert_table[(__n/2) & 0x7F]; /* 8 */ \ |
---|
2111 | __inv = 2 * __inv - __inv * __inv * __n; /* 16 */ \ |
---|
2112 | ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1); \ |
---|
2113 | (inv) = __inv & GMP_NUMB_MASK; \ |
---|
2114 | } while (0) |
---|
2115 | #else |
---|
2116 | #if BITS_PER_MP_LIMB <= 32 |
---|
2117 | #define modlimb_invert(inv,n) \ |
---|
2118 | do { \ |
---|
2119 | mp_limb_t __n = (n); \ |
---|
2120 | mp_limb_t __inv; \ |
---|
2121 | ASSERT ((__n & 1) == 1); \ |
---|
2122 | __inv = modlimb_invert_table[(__n/2) & 0x7F]; /* 8 */ \ |
---|
2123 | __inv = 2 * __inv - __inv * __inv * __n; /* 16 */ \ |
---|
2124 | __inv = 2 * __inv - __inv * __inv * __n; /* 32 */ \ |
---|
2125 | ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1); \ |
---|
2126 | (inv) = __inv & GMP_NUMB_MASK; \ |
---|
2127 | } while (0) |
---|
2128 | #else |
---|
2129 | #if BITS_PER_MP_LIMB <= 64 |
---|
2130 | #define modlimb_invert(inv,n) \ |
---|
2131 | do { \ |
---|
2132 | mp_limb_t __n = (n); \ |
---|
2133 | mp_limb_t __inv; \ |
---|
2134 | ASSERT ((__n & 1) == 1); \ |
---|
2135 | __inv = modlimb_invert_table[(__n/2) & 0x7F]; /* 8 */ \ |
---|
2136 | __inv = 2 * __inv - __inv * __inv * __n; /* 16 */ \ |
---|
2137 | __inv = 2 * __inv - __inv * __inv * __n; /* 32 */ \ |
---|
2138 | __inv = 2 * __inv - __inv * __inv * __n; /* 64 */ \ |
---|
2139 | ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1); \ |
---|
2140 | (inv) = __inv & GMP_NUMB_MASK; \ |
---|
2141 | } while (0) |
---|
2142 | #endif /* 64 */ |
---|
2143 | #endif /* 32 */ |
---|
2144 | #endif /* 16 */ |
---|
2145 | #endif /* 8 */ |
---|
2146 | |
---|
2147 | |
---|
2148 | /* Multiplicative inverse of 3, modulo 2^BITS_PER_MP_LIMB. |
---|
2149 | 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits. */ |
---|
2150 | #define MODLIMB_INVERSE_3 ((GMP_NUMB_MAX / 3) * 2 + 1) |
---|
2151 | |
---|
2152 | |
---|
2153 | /* Set r to -a mod d. a>=d is allowed. Can give r>d. All should be limbs. |
---|
2154 | |
---|
2155 | It's not clear whether this is the best way to do this calculation. |
---|
2156 | Anything congruent to -a would be fine for the one limb congruence |
---|
2157 | tests. */ |
---|
2158 | |
---|
2159 | #define NEG_MOD(r, a, d) \ |
---|
2160 | do { \ |
---|
2161 | ASSERT ((d) != 0); \ |
---|
2162 | ASSERT_LIMB (a); \ |
---|
2163 | ASSERT_LIMB (d); \ |
---|
2164 | \ |
---|
2165 | if ((a) <= (d)) \ |
---|
2166 | { \ |
---|
2167 | /* small a is reasonably likely */ \ |
---|
2168 | (r) = (d) - (a); \ |
---|
2169 | } \ |
---|
2170 | else \ |
---|
2171 | { \ |
---|
2172 | unsigned __twos; \ |
---|
2173 | mp_limb_t __dnorm; \ |
---|
2174 | count_leading_zeros (__twos, d); \ |
---|
2175 | __twos -= GMP_NAIL_BITS; \ |
---|
2176 | __dnorm = (d) << __twos; \ |
---|
2177 | (r) = ((a) <= __dnorm ? __dnorm : 2*__dnorm) - (a); \ |
---|
2178 | } \ |
---|
2179 | \ |
---|
2180 | ASSERT_LIMB (r); \ |
---|
2181 | } while (0) |
---|
2182 | |
---|
2183 | /* A bit mask of all the least significant zero bits of n, or -1 if n==0. */ |
---|
2184 | #define LOW_ZEROS_MASK(n) (((n) & -(n)) - 1) |
---|
2185 | |
---|
2186 | |
---|
2187 | /* Set "p" to 1 if there's an odd number of 1 bits in "n", or to 0 if |
---|
2188 | there's an even number. */ |
---|
2189 | |
---|
2190 | #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_FAMILY_x86 |
---|
2191 | #define ULONG_PARITY(p, n) \ |
---|
2192 | do { \ |
---|
2193 | char __p; \ |
---|
2194 | unsigned long __n = (n); \ |
---|
2195 | __n ^= (__n >> 16); \ |
---|
2196 | asm ("xorb %h1, %b1\n" \ |
---|
2197 | "setpo %0\n" \ |
---|
2198 | : "=qm" (__p), "=q" (__n) \ |
---|
2199 | : "1" (__n)); \ |
---|
2200 | (p) = __p; \ |
---|
2201 | } while (0) |
---|
2202 | #else |
---|
2203 | #define ULONG_PARITY(p, n) \ |
---|
2204 | do { \ |
---|
2205 | unsigned long __n = (n); \ |
---|
2206 | int __p = 0; \ |
---|
2207 | do \ |
---|
2208 | { \ |
---|
2209 | __p ^= 0x96696996L >> (__n & 0x1F); \ |
---|
2210 | __n >>= 5; \ |
---|
2211 | } \ |
---|
2212 | while (__n != 0); \ |
---|
2213 | \ |
---|
2214 | (p) = __p; \ |
---|
2215 | } while (0) |
---|
2216 | #endif |
---|
2217 | |
---|
2218 | |
---|
2219 | /* bswap is available on i486 and up and is fast. A combination rorw $8 / |
---|
2220 | roll $16 / rorw $8 is used in glibc for plain i386 (and in the linux |
---|
2221 | kernel with xchgb instead of rorw), but this is not done here, because |
---|
2222 | i386 means generic x86 and mixing word and dword operations will cause |
---|
2223 | partial register stalls on P6 chips. */ |
---|
2224 | #if defined (__GNUC__) && ! defined (NO_ASM) \ |
---|
2225 | && HAVE_HOST_CPU_FAMILY_x86 && ! HAVE_HOST_CPU_i386 \ |
---|
2226 | && BITS_PER_MP_LIMB == 32 |
---|
2227 | #define BSWAP_LIMB(dst, src) \ |
---|
2228 | do { \ |
---|
2229 | asm ("bswap %0" : "=r" (dst) : "0" (src)); \ |
---|
2230 | } while (0) |
---|
2231 | #endif /* x86 */ |
---|
2232 | |
---|
2233 | #if defined (__GNUC__) && ! defined (NO_ASM) \ |
---|
2234 | && defined (__ia64) && BITS_PER_MP_LIMB == 64 |
---|
2235 | #define BSWAP_LIMB(dst, src) \ |
---|
2236 | do { \ |
---|
2237 | asm ("mux1 %0 = %1, @rev" : "=r" (dst) : "r" (src)); \ |
---|
2238 | } while (0) |
---|
2239 | #endif |
---|
2240 | |
---|
2241 | #if ! defined (BSWAP_LIMB) |
---|
2242 | #if BITS_PER_MP_LIMB == 8 |
---|
2243 | #define BSWAP_LIMB(dst, src) \ |
---|
2244 | do { (dst) = (src); } while (0) |
---|
2245 | #endif |
---|
2246 | #if BITS_PER_MP_LIMB == 16 |
---|
2247 | #define BSWAP_LIMB(dst, src) \ |
---|
2248 | do { \ |
---|
2249 | (dst) = ((src) << 8) + ((src) >> 8); \ |
---|
2250 | } while (0) |
---|
2251 | #endif |
---|
2252 | #if BITS_PER_MP_LIMB == 32 |
---|
2253 | #define BSWAP_LIMB(dst, src) \ |
---|
2254 | do { \ |
---|
2255 | (dst) = \ |
---|
2256 | ((src) << 24) \ |
---|
2257 | + (((src) & 0xFF00) << 8) \ |
---|
2258 | + (((src) >> 8) & 0xFF00) \ |
---|
2259 | + ((src) >> 24); \ |
---|
2260 | } while (0) |
---|
2261 | #endif |
---|
2262 | #if BITS_PER_MP_LIMB == 64 |
---|
2263 | #define BSWAP_LIMB(dst, src) \ |
---|
2264 | do { \ |
---|
2265 | (dst) = \ |
---|
2266 | ((src) << 56) \ |
---|
2267 | + (((src) & 0xFF00) << 40) \ |
---|
2268 | + (((src) & 0xFF0000) << 24) \ |
---|
2269 | + (((src) & 0xFF000000) << 8) \ |
---|
2270 | + (((src) >> 8) & 0xFF000000) \ |
---|
2271 | + (((src) >> 24) & 0xFF0000) \ |
---|
2272 | + (((src) >> 40) & 0xFF00) \ |
---|
2273 | + ((src) >> 56); \ |
---|
2274 | } while (0) |
---|
2275 | #endif |
---|
2276 | #endif |
---|
2277 | |
---|
2278 | |
---|
2279 | /* Apparently lwbrx might be slow on some PowerPC chips, so restrict it to |
---|
2280 | those we know are fast. */ |
---|
2281 | #if defined (__GNUC__) && ! defined (NO_ASM) \ |
---|
2282 | && BITS_PER_MP_LIMB == 32 && HAVE_LIMB_BIG_ENDIAN \ |
---|
2283 | && (HAVE_HOST_CPU_powerpc604 \ |
---|
2284 | || HAVE_HOST_CPU_powerpc604e \ |
---|
2285 | || HAVE_HOST_CPU_powerpc750 \ |
---|
2286 | || HAVE_HOST_CPU_powerpc7400) |
---|
2287 | #define BSWAP_LIMB_FETCH(limb, src) \ |
---|
2288 | do { \ |
---|
2289 | mp_srcptr __blf_src = (src); \ |
---|
2290 | mp_limb_t __limb; \ |
---|
2291 | __asm__ ("lwbrx %0, 0, %1" \ |
---|
2292 | : "=r" (__limb) \ |
---|
2293 | : "r" (__blf_src), \ |
---|
2294 | "m" (*__blf_src)); \ |
---|
2295 | (limb) = __limb; \ |
---|
2296 | } while (0) |
---|
2297 | #endif |
---|
2298 | |
---|
2299 | #if ! defined (BSWAP_LIMB_FETCH) |
---|
2300 | #define BSWAP_LIMB_FETCH(limb, src) BSWAP_LIMB (limb, *(src)) |
---|
2301 | #endif |
---|
2302 | |
---|
2303 | |
---|
2304 | /* On the same basis that lwbrx might be slow, restrict stwbrx to those we |
---|
2305 | know are fast. FIXME: Is this necessary? */ |
---|
2306 | #if defined (__GNUC__) && ! defined (NO_ASM) \ |
---|
2307 | && BITS_PER_MP_LIMB == 32 && HAVE_LIMB_BIG_ENDIAN \ |
---|
2308 | && (HAVE_HOST_CPU_powerpc604 \ |
---|
2309 | || HAVE_HOST_CPU_powerpc604e \ |
---|
2310 | || HAVE_HOST_CPU_powerpc750 \ |
---|
2311 | || HAVE_HOST_CPU_powerpc7400) |
---|
2312 | #define BSWAP_LIMB_STORE(dst, limb) \ |
---|
2313 | do { \ |
---|
2314 | mp_ptr __dst = (dst); \ |
---|
2315 | mp_limb_t __limb = (limb); \ |
---|
2316 | __asm__ ("stwbrx %1, 0, %2" \ |
---|
2317 | : "=m" (*__dst) \ |
---|
2318 | : "r" (__limb), \ |
---|
2319 | "r" (__dst)); \ |
---|
2320 | } while (0) |
---|
2321 | #endif |
---|
2322 | |
---|
2323 | #if ! defined (BSWAP_LIMB_STORE) |
---|
2324 | #define BSWAP_LIMB_STORE(dst, limb) BSWAP_LIMB (*(dst), limb) |
---|
2325 | #endif |
---|
2326 | |
---|
2327 | |
---|
2328 | /* Byte swap limbs from {src,size} and store at {dst,size}. */ |
---|
2329 | #define MPN_BSWAP(dst, src, size) \ |
---|
2330 | do { \ |
---|
2331 | mp_ptr __dst = (dst); \ |
---|
2332 | mp_srcptr __src = (src); \ |
---|
2333 | mp_size_t __size = (size); \ |
---|
2334 | mp_size_t __i; \ |
---|
2335 | ASSERT ((size) >= 0); \ |
---|
2336 | ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); \ |
---|
2337 | CRAY_Pragma ("_CRI ivdep"); \ |
---|
2338 | for (__i = 0; __i < __size; __i++) \ |
---|
2339 | { \ |
---|
2340 | BSWAP_LIMB_FETCH (*__dst, __src); \ |
---|
2341 | __dst++; \ |
---|
2342 | __src++; \ |
---|
2343 | } \ |
---|
2344 | } while (0) |
---|
2345 | |
---|
2346 | /* Byte swap limbs from {dst,size} and store in reverse order at {src,size}. */ |
---|
2347 | #define MPN_BSWAP_REVERSE(dst, src, size) \ |
---|
2348 | do { \ |
---|
2349 | mp_ptr __dst = (dst); \ |
---|
2350 | mp_size_t __size = (size); \ |
---|
2351 | mp_srcptr __src = (src) + __size - 1; \ |
---|
2352 | mp_size_t __i; \ |
---|
2353 | ASSERT ((size) >= 0); \ |
---|
2354 | ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \ |
---|
2355 | CRAY_Pragma ("_CRI ivdep"); \ |
---|
2356 | for (__i = 0; __i < __size; __i++) \ |
---|
2357 | { \ |
---|
2358 | BSWAP_LIMB_FETCH (*__dst, __src); \ |
---|
2359 | __dst++; \ |
---|
2360 | __src--; \ |
---|
2361 | } \ |
---|
2362 | } while (0) |
---|
2363 | |
---|
2364 | |
---|
2365 | /* No processor claiming to be SPARC v9 compliant seems to |
---|
2366 | implement the POPC instruction. Disable pattern for now. */ |
---|
2367 | #if 0 |
---|
2368 | #if defined __GNUC__ && defined __sparc_v9__ && BITS_PER_MP_LIMB == 64 |
---|
2369 | #define popc_limb(result, input) \ |
---|
2370 | do { \ |
---|
2371 | DItype __res; \ |
---|
2372 | asm ("popc %1,%0" : "=r" (result) : "rI" (input)); \ |
---|
2373 | } while (0) |
---|
2374 | #endif |
---|
2375 | #endif |
---|
2376 | |
---|
2377 | /* Cool population count of an mp_limb_t. |
---|
2378 | You have to figure out how this works, We won't tell you! |
---|
2379 | |
---|
2380 | The constants could also be expressed as: |
---|
2381 | 0xAA... = [2^(N+1) / 3] = [(2^N-1)/3*2] |
---|
2382 | 0x33... = [2^N / 5] = [(2^N-1)/5] |
---|
2383 | 0x0f... = [2^N / 17] = [(2^N-1)/17] |
---|
2384 | (N is BITS_PER_MP_LIMB, [] denotes truncation.) */ |
---|
2385 | |
---|
2386 | #if ! defined (popc_limb) && BITS_PER_MP_LIMB == 64 |
---|
2387 | #define popc_limb(result, input) \ |
---|
2388 | do { \ |
---|
2389 | mp_limb_t __x = (input); \ |
---|
2390 | __x -= (__x & CNST_LIMB(0xaaaaaaaaaaaaaaaa)) >> 1; \ |
---|
2391 | __x = ((__x >> 2) & CNST_LIMB(0x3333333333333333)) \ |
---|
2392 | + (__x & CNST_LIMB(0x3333333333333333)); \ |
---|
2393 | __x = ((__x >> 4) + __x) & CNST_LIMB(0x0f0f0f0f0f0f0f0f); \ |
---|
2394 | __x = ((__x >> 8) + __x); \ |
---|
2395 | __x = ((__x >> 16) + __x); \ |
---|
2396 | __x = ((__x >> 32) + __x) & 0xff; \ |
---|
2397 | (result) = __x; \ |
---|
2398 | } while (0) |
---|
2399 | #endif |
---|
2400 | #if ! defined (popc_limb) && BITS_PER_MP_LIMB == 32 |
---|
2401 | #define popc_limb(result, input) \ |
---|
2402 | do { \ |
---|
2403 | mp_limb_t __x = (input); \ |
---|
2404 | __x -= (__x & 0xaaaaaaaaL) >> 1; \ |
---|
2405 | __x = ((__x >> 2) & 0x33333333L) + (__x & 0x33333333L); \ |
---|
2406 | __x = ((__x >> 4) + __x) & 0x0f0f0f0fL; \ |
---|
2407 | __x = ((__x >> 8) + __x); \ |
---|
2408 | __x = ((__x >> 16) + __x) & 0xff; \ |
---|
2409 | (result) = __x; \ |
---|
2410 | } while (0) |
---|
2411 | #endif |
---|
2412 | #if ! defined (popc_limb) && BITS_PER_MP_LIMB == 16 |
---|
2413 | #define popc_limb(result, input) \ |
---|
2414 | do { \ |
---|
2415 | mp_limb_t __x = (input); \ |
---|
2416 | __x -= (__x & 0xaaaa) >> 1; \ |
---|
2417 | __x = ((__x >> 2) & 0x3333) + (__x & 0x3333); \ |
---|
2418 | __x = ((__x >> 4) + __x) & 0x0f0f; \ |
---|
2419 | __x = ((__x >> 8) + __x) & 0xff; \ |
---|
2420 | (result) = __x; \ |
---|
2421 | } while (0) |
---|
2422 | #endif |
---|
2423 | #if ! defined (popc_limb) && BITS_PER_MP_LIMB == 8 |
---|
2424 | #define popc_limb(result, input) \ |
---|
2425 | do { \ |
---|
2426 | mp_limb_t __x = (input); \ |
---|
2427 | __x -= (__x & 0xaa) >> 1; \ |
---|
2428 | __x = ((__x >> 2) & 0x33) + (__x & 0x33); \ |
---|
2429 | __x = ((__x >> 4) + __x) & 0xf; \ |
---|
2430 | (result) = __x; \ |
---|
2431 | } while (0) |
---|
2432 | #endif |
---|
2433 | #if ! defined (popc_limb) && BITS_PER_MP_LIMB == 4 |
---|
2434 | #define popc_limb(result, input) \ |
---|
2435 | do { \ |
---|
2436 | mp_limb_t __x = (input); \ |
---|
2437 | __x = (__x & 1) + ((__x >> 1) & 1) + ((__x >> 2) & 1) + ((__x >> 3) & 1); \ |
---|
2438 | (result) = __x; \ |
---|
2439 | } while (0) |
---|
2440 | #endif |
---|
2441 | |
---|
2442 | |
---|
2443 | /* Define stuff for longlong.h. */ |
---|
2444 | #if HAVE_ATTRIBUTE_MODE |
---|
2445 | typedef unsigned int UQItype __attribute__ ((mode (QI))); |
---|
2446 | typedef int SItype __attribute__ ((mode (SI))); |
---|
2447 | typedef unsigned int USItype __attribute__ ((mode (SI))); |
---|
2448 | typedef int DItype __attribute__ ((mode (DI))); |
---|
2449 | typedef unsigned int UDItype __attribute__ ((mode (DI))); |
---|
2450 | #else |
---|
2451 | typedef unsigned char UQItype; |
---|
2452 | typedef long SItype; |
---|
2453 | typedef unsigned long USItype; |
---|
2454 | #if HAVE_LONG_LONG |
---|
2455 | typedef long long int DItype; |
---|
2456 | typedef unsigned long long int UDItype; |
---|
2457 | #else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */ |
---|
2458 | typedef long int DItype; |
---|
2459 | typedef unsigned long int UDItype; |
---|
2460 | #endif |
---|
2461 | #endif |
---|
2462 | |
---|
2463 | typedef mp_limb_t UWtype; |
---|
2464 | typedef unsigned int UHWtype; |
---|
2465 | #define W_TYPE_SIZE BITS_PER_MP_LIMB |
---|
2466 | |
---|
2467 | /* Define ieee_double_extract and _GMP_IEEE_FLOATS. */ |
---|
2468 | |
---|
2469 | #if (defined (__arm__) && (defined (__ARMWEL__) || defined (__linux__))) |
---|
2470 | /* Special case for little endian ARM since floats remain in big-endian. */ |
---|
2471 | #define _GMP_IEEE_FLOATS 1 |
---|
2472 | union ieee_double_extract |
---|
2473 | { |
---|
2474 | struct |
---|
2475 | { |
---|
2476 | unsigned int manh:20; |
---|
2477 | unsigned int exp:11; |
---|
2478 | unsigned int sig:1; |
---|
2479 | unsigned int manl:32; |
---|
2480 | } s; |
---|
2481 | double d; |
---|
2482 | }; |
---|
2483 | #else |
---|
2484 | #if defined (_LITTLE_ENDIAN) || defined (__LITTLE_ENDIAN__) \ |
---|
2485 | || defined (__alpha) \ |
---|
2486 | || defined (__clipper__) \ |
---|
2487 | || defined (__cris) \ |
---|
2488 | || defined (__i386__) \ |
---|
2489 | || defined (__x86_64__) \ |
---|
2490 | || defined (__i860__) \ |
---|
2491 | || defined (__i960__) \ |
---|
2492 | || defined (__ia64) && ! defined (__hpux) \ |
---|
2493 | || defined (MIPSEL) || defined (_MIPSEL) \ |
---|
2494 | || defined (__ns32000__) \ |
---|
2495 | || defined (__WINNT) || defined (_WIN32) |
---|
2496 | #define _GMP_IEEE_FLOATS 1 |
---|
2497 | union ieee_double_extract |
---|
2498 | { |
---|
2499 | struct |
---|
2500 | { |
---|
2501 | unsigned int manl:32; |
---|
2502 | unsigned int manh:20; |
---|
2503 | unsigned int exp:11; |
---|
2504 | unsigned int sig:1; |
---|
2505 | } s; |
---|
2506 | double d; |
---|
2507 | }; |
---|
2508 | #else /* Need this as an #else since the tests aren't made exclusive. */ |
---|
2509 | #if defined (__mc68000__) || defined (__mc68020__) || defined (__m68k__)\ |
---|
2510 | || defined(mc68020) |
---|
2511 | #define _GMP_IEEE_FLOATS 1 |
---|
2512 | union ieee_double_extract |
---|
2513 | { |
---|
2514 | struct |
---|
2515 | { |
---|
2516 | /* "int" might be only 16 bits, so use "long" */ |
---|
2517 | unsigned long sig:1; |
---|
2518 | unsigned long exp:11; |
---|
2519 | unsigned long manh:20; |
---|
2520 | unsigned long manl:32; |
---|
2521 | } s; |
---|
2522 | double d; |
---|
2523 | }; |
---|
2524 | #else |
---|
2525 | #if defined (_BIG_ENDIAN) || defined (__BIG_ENDIAN__) \ |
---|
2526 | || defined (__a29k__) || defined (_AM29K) \ |
---|
2527 | || defined (__arm__) \ |
---|
2528 | || (defined (__convex__) && defined (_IEEE_FLOAT_)) \ |
---|
2529 | || defined (_CRAYMPP) || defined (_CRAYIEEE) \ |
---|
2530 | || defined (__i370__) || defined (__mvs__) \ |
---|
2531 | || defined (__m88000__) \ |
---|
2532 | || defined (MIPSEB) || defined (_MIPSEB) \ |
---|
2533 | || defined (__hppa) || defined (__hppa__) \ |
---|
2534 | || defined (__pyr__) \ |
---|
2535 | || defined (__ibm032__) \ |
---|
2536 | || defined (_IBMR2) || defined (_ARCH_PPC) \ |
---|
2537 | || defined (__s390__) \ |
---|
2538 | || defined (__sh__) \ |
---|
2539 | || defined (__sparc) || defined (sparc) || defined (__sparc__) \ |
---|
2540 | || defined (__sparc_v9) || defined (__sparc_v9__) \ |
---|
2541 | || defined (__we32k__) |
---|
2542 | #define _GMP_IEEE_FLOATS 1 |
---|
2543 | union ieee_double_extract |
---|
2544 | { |
---|
2545 | struct |
---|
2546 | { |
---|
2547 | unsigned int sig:1; |
---|
2548 | unsigned int exp:11; |
---|
2549 | unsigned int manh:20; |
---|
2550 | unsigned int manl:32; |
---|
2551 | } s; |
---|
2552 | double d; |
---|
2553 | }; |
---|
2554 | #endif |
---|
2555 | #endif |
---|
2556 | #endif |
---|
2557 | #endif |
---|
2558 | |
---|
2559 | /* Use (4.0 * ...) instead of (2.0 * ...) to work around buggy compilers |
---|
2560 | that don't convert ulong->double correctly (eg. SunOS 4 native cc). */ |
---|
2561 | #define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (GMP_NUMB_BITS - 2))) |
---|
2562 | /* Maximum number of limbs it will take to store any `double'. |
---|
2563 | We assume doubles have 53 mantissam bits. */ |
---|
2564 | #define LIMBS_PER_DOUBLE ((53 + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS + 1) |
---|
2565 | |
---|
2566 | double __gmp_scale2 _PROTO ((double, int)) ATTRIBUTE_CONST; |
---|
2567 | int __gmp_extract_double _PROTO ((mp_ptr, double)); |
---|
2568 | |
---|
2569 | extern int __gmp_junk; |
---|
2570 | extern const int __gmp_0; |
---|
2571 | void __gmp_exception _PROTO ((int)) ATTRIBUTE_NORETURN; |
---|
2572 | void __gmp_divide_by_zero _PROTO ((void)) ATTRIBUTE_NORETURN; |
---|
2573 | void __gmp_sqrt_of_negative _PROTO ((void)) ATTRIBUTE_NORETURN; |
---|
2574 | #define GMP_ERROR(code) __gmp_exception (code) |
---|
2575 | #define DIVIDE_BY_ZERO __gmp_divide_by_zero () |
---|
2576 | #define SQRT_OF_NEGATIVE __gmp_sqrt_of_negative () |
---|
2577 | |
---|
2578 | #if defined _LONG_LONG_LIMB |
---|
2579 | #if __GMP_HAVE_TOKEN_PASTE |
---|
2580 | #define CNST_LIMB(C) ((mp_limb_t) C##LL) |
---|
2581 | #else |
---|
2582 | #define CNST_LIMB(C) ((mp_limb_t) C/**/LL) |
---|
2583 | #endif |
---|
2584 | #else /* not _LONG_LONG_LIMB */ |
---|
2585 | #if __GMP_HAVE_TOKEN_PASTE |
---|
2586 | #define CNST_LIMB(C) ((mp_limb_t) C##L) |
---|
2587 | #else |
---|
2588 | #define CNST_LIMB(C) ((mp_limb_t) C/**/L) |
---|
2589 | #endif |
---|
2590 | #endif /* _LONG_LONG_LIMB */ |
---|
2591 | |
---|
2592 | /* Stuff used by mpn/generic/perfsqr.c and mpz/prime_p.c */ |
---|
2593 | #if GMP_NUMB_BITS == 2 |
---|
2594 | #define PP 0x3 /* 3 */ |
---|
2595 | #define PP_FIRST_OMITTED 5 |
---|
2596 | #endif |
---|
2597 | #if GMP_NUMB_BITS == 4 |
---|
2598 | #define PP 0xF /* 3 x 5 */ |
---|
2599 | #define PP_FIRST_OMITTED 7 |
---|
2600 | #endif |
---|
2601 | #if GMP_NUMB_BITS == 8 |
---|
2602 | #define PP 0x69 /* 3 x 5 x 7 */ |
---|
2603 | #define PP_FIRST_OMITTED 11 |
---|
2604 | #endif |
---|
2605 | #if GMP_NUMB_BITS == 16 |
---|
2606 | #define PP 0x3AA7 /* 3 x 5 x 7 x 11 x 13 */ |
---|
2607 | #define PP_FIRST_OMITTED 17 |
---|
2608 | #endif |
---|
2609 | #if GMP_NUMB_BITS == 32 |
---|
2610 | #define PP 0xC0CFD797L /* 3 x 5 x 7 x 11 x ... x 29 */ |
---|
2611 | #define PP_INVERTED 0x53E5645CL |
---|
2612 | #define PP_FIRST_OMITTED 31 |
---|
2613 | #endif |
---|
2614 | #if GMP_NUMB_BITS == 64 |
---|
2615 | #define PP CNST_LIMB(0xE221F97C30E94E1D) /* 3 x 5 x 7 x 11 x ... x 53 */ |
---|
2616 | #define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B) |
---|
2617 | #define PP_FIRST_OMITTED 59 |
---|
2618 | #endif |
---|
2619 | #ifndef PP_FIRST_OMITTED |
---|
2620 | #define PP_FIRST_OMITTED 3 |
---|
2621 | #endif |
---|
2622 | |
---|
2623 | |
---|
2624 | |
---|
2625 | /* BIT1 means a result value in bit 1 (second least significant bit), with a |
---|
2626 | zero bit representing +1 and a one bit representing -1. Bits other than |
---|
2627 | bit 1 are garbage. These are meant to be kept in "int"s, and casts are |
---|
2628 | used to ensure the expressions are "int"s even if a and/or b might be |
---|
2629 | other types. |
---|
2630 | |
---|
2631 | JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base |
---|
2632 | and their speed is important. Expressions are used rather than |
---|
2633 | conditionals to accumulate sign changes, which effectively means XORs |
---|
2634 | instead of conditional JUMPs. */ |
---|
2635 | |
---|
2636 | /* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */ |
---|
2637 | #define JACOBI_S0(a) (((a) == 1) | ((a) == -1)) |
---|
2638 | |
---|
2639 | /* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */ |
---|
2640 | #define JACOBI_U0(a) ((a) == 1) |
---|
2641 | |
---|
2642 | /* (a/0), with a given by low and size; |
---|
2643 | is 1 if a=+/-1, 0 otherwise */ |
---|
2644 | #define JACOBI_LS0(alow,asize) \ |
---|
2645 | (((asize) == 1 || (asize) == -1) && (alow) == 1) |
---|
2646 | |
---|
2647 | /* (a/0), with a an mpz_t; |
---|
2648 | fetch of low limb always valid, even if size is zero */ |
---|
2649 | #define JACOBI_Z0(a) JACOBI_LS0 (PTR(a)[0], SIZ(a)) |
---|
2650 | |
---|
2651 | /* (0/b), with b unsigned; is 1 if b=+/-1, 0 otherwise */ |
---|
2652 | #define JACOBI_0U(b) ((b) == 1) |
---|
2653 | |
---|
2654 | /* (0/b), with b unsigned; is 1 if b=+/-1, 0 otherwise */ |
---|
2655 | #define JACOBI_0S(b) ((b) == 1 || (b) == -1) |
---|
2656 | |
---|
2657 | /* (0/b), with b given by low and size; is 1 if b=+/-1, 0 otherwise */ |
---|
2658 | #define JACOBI_0LS(blow,bsize) \ |
---|
2659 | (((bsize) == 1 || (bsize) == -1) && (blow) == 1) |
---|
2660 | |
---|
2661 | /* Convert a bit1 to +1 or -1. */ |
---|
2662 | #define JACOBI_BIT1_TO_PN(result_bit1) \ |
---|
2663 | (1 - ((int) (result_bit1) & 2)) |
---|
2664 | |
---|
2665 | /* (2/b), with b unsigned and odd; |
---|
2666 | is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and |
---|
2667 | hence obtained from (b>>1)^b */ |
---|
2668 | #define JACOBI_TWO_U_BIT1(b) \ |
---|
2669 | ((int) (((b) >> 1) ^ (b))) |
---|
2670 | |
---|
2671 | /* (2/b)^twos, with b unsigned and odd */ |
---|
2672 | #define JACOBI_TWOS_U_BIT1(twos, b) \ |
---|
2673 | ((int) ((twos) << 1) & JACOBI_TWO_U_BIT1 (b)) |
---|
2674 | |
---|
2675 | /* (2/b)^twos, with b unsigned and odd */ |
---|
2676 | #define JACOBI_TWOS_U(twos, b) \ |
---|
2677 | (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b))) |
---|
2678 | |
---|
2679 | /* (-1/b), with b odd (signed or unsigned); |
---|
2680 | is (-1)^((b-1)/2) */ |
---|
2681 | #define JACOBI_N1B_BIT1(b) \ |
---|
2682 | ((int) (b)) |
---|
2683 | |
---|
2684 | /* (a/b) effect due to sign of a: signed/unsigned, b odd; |
---|
2685 | is (-1/b) if a<0, or +1 if a>=0 */ |
---|
2686 | #define JACOBI_ASGN_SU_BIT1(a, b) \ |
---|
2687 | ((((a) < 0) << 1) & JACOBI_N1B_BIT1(b)) |
---|
2688 | |
---|
2689 | /* (a/b) effect due to sign of b: signed/signed; |
---|
2690 | is -1 if a and b both negative, +1 otherwise */ |
---|
2691 | #define JACOBI_BSGN_SS_BIT1(a, b) \ |
---|
2692 | ((((a)<0) & ((b)<0)) << 1) |
---|
2693 | |
---|
2694 | /* (a/b) effect due to sign of b: signed/mpz; |
---|
2695 | is -1 if a and b both negative, +1 otherwise */ |
---|
2696 | #define JACOBI_BSGN_SZ_BIT1(a, b) \ |
---|
2697 | JACOBI_BSGN_SS_BIT1 (a, SIZ(b)) |
---|
2698 | |
---|
2699 | /* (a/b) effect due to sign of b: mpz/signed; |
---|
2700 | is -1 if a and b both negative, +1 otherwise */ |
---|
2701 | #define JACOBI_BSGN_ZS_BIT1(a, b) \ |
---|
2702 | JACOBI_BSGN_SZ_BIT1 (b, a) |
---|
2703 | |
---|
2704 | /* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd; |
---|
2705 | is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4, or -1 if |
---|
2706 | both a,b==3mod4, achieved in bit 1 by a&b. No ASSERT()s about a,b odd |
---|
2707 | because this is used in a couple of places with only bit 1 of a or b |
---|
2708 | valid. */ |
---|
2709 | #define JACOBI_RECIP_UU_BIT1(a, b) \ |
---|
2710 | ((int) ((a) & (b))) |
---|
2711 | |
---|
2712 | |
---|
2713 | /* Set a_rem to {a_ptr,a_size} reduced modulo b, either using mod_1 or |
---|
2714 | modexact_1_odd, but in either case leaving a_rem<b. b must be odd and |
---|
2715 | unsigned. modexact_1_odd effectively calculates -a mod b, and |
---|
2716 | result_bit1 is adjusted for the factor of -1. |
---|
2717 | |
---|
2718 | The way mpn_modexact_1_odd sometimes bases its remainder on a_size and |
---|
2719 | sometimes on a_size-1 means if GMP_NUMB_BITS is odd we can't know what |
---|
2720 | factor to introduce into result_bit1, so for that case use mpn_mod_1 |
---|
2721 | unconditionally. |
---|
2722 | |
---|
2723 | FIXME: mpn_modexact_1_odd is more efficient, so some way to get it used |
---|
2724 | for odd GMP_NUMB_BITS would be good. Perhaps it could mung its result, |
---|
2725 | or not skip a divide step, or something. */ |
---|
2726 | |
---|
2727 | #define JACOBI_MOD_OR_MODEXACT_1_ODD(result_bit1, a_rem, a_ptr, a_size, b) \ |
---|
2728 | do { \ |
---|
2729 | mp_srcptr __a_ptr = (a_ptr); \ |
---|
2730 | mp_size_t __a_size = (a_size); \ |
---|
2731 | mp_limb_t __b = (b); \ |
---|
2732 | \ |
---|
2733 | ASSERT (__a_size >= 1); \ |
---|
2734 | ASSERT (__b & 1); \ |
---|
2735 | \ |
---|
2736 | if ((GMP_NUMB_BITS % 2) != 0 \ |
---|
2737 | || BELOW_THRESHOLD (__a_size, MODEXACT_1_ODD_THRESHOLD)) \ |
---|
2738 | { \ |
---|
2739 | (a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b); \ |
---|
2740 | } \ |
---|
2741 | else \ |
---|
2742 | { \ |
---|
2743 | (result_bit1) ^= JACOBI_N1B_BIT1 (__b); \ |
---|
2744 | (a_rem) = mpn_modexact_1_odd (__a_ptr, __a_size, __b); \ |
---|
2745 | } \ |
---|
2746 | } while (0) |
---|
2747 | |
---|
2748 | |
---|
2749 | /* __GMPF_BITS_TO_PREC applies a minimum 53 bits, rounds upwards to a whole |
---|
2750 | limb and adds an extra limb. __GMPF_PREC_TO_BITS drops that extra limb, |
---|
2751 | hence giving back the user's size in bits rounded up. Notice that |
---|
2752 | converting prec->bits->prec gives an unchanged value. */ |
---|
2753 | #define __GMPF_BITS_TO_PREC(n) \ |
---|
2754 | ((mp_size_t) ((__GMP_MAX (53, n) + 2 * GMP_NUMB_BITS - 1) / GMP_NUMB_BITS)) |
---|
2755 | #define __GMPF_PREC_TO_BITS(n) \ |
---|
2756 | ((unsigned long) (n) * GMP_NUMB_BITS - GMP_NUMB_BITS) |
---|
2757 | |
---|
2758 | extern mp_size_t __gmp_default_fp_limb_precision; |
---|
2759 | |
---|
2760 | |
---|
2761 | /* Set n to the number of significant digits an mpf of the given _mp_prec |
---|
2762 | field, in the given base. This is a rounded up value, designed to ensure |
---|
2763 | there's enough digits to reproduce all the guaranteed part of the value. |
---|
2764 | |
---|
2765 | There are prec many limbs, but the high might be only "1" so forget it |
---|
2766 | and just count prec-1 limbs into chars. +1 rounds that upwards, and a |
---|
2767 | further +1 is because the limbs usually won't fall on digit boundaries. |
---|
2768 | |
---|
2769 | FIXME: If base is a power of 2 and the bits per digit divides |
---|
2770 | BITS_PER_MP_LIMB then the +2 is unnecessary. This happens always for |
---|
2771 | base==2, and in base==16 with the current 32 or 64 bit limb sizes. */ |
---|
2772 | |
---|
2773 | #define MPF_SIGNIFICANT_DIGITS(n, base, prec) \ |
---|
2774 | do { \ |
---|
2775 | ASSERT (base >= 2 && base < numberof (mp_bases)); \ |
---|
2776 | (n) = 2 + (size_t) ((((prec) - 1) * BITS_PER_MP_LIMB) \ |
---|
2777 | * mp_bases[(base)].chars_per_bit_exactly); \ |
---|
2778 | } while (0) |
---|
2779 | |
---|
2780 | |
---|
2781 | #define DOPRNT_CONV_FIXED 1 |
---|
2782 | #define DOPRNT_CONV_SCIENTIFIC 2 |
---|
2783 | #define DOPRNT_CONV_GENERAL 3 |
---|
2784 | |
---|
2785 | #define DOPRNT_JUSTIFY_NONE 0 |
---|
2786 | #define DOPRNT_JUSTIFY_LEFT 1 |
---|
2787 | #define DOPRNT_JUSTIFY_RIGHT 2 |
---|
2788 | #define DOPRNT_JUSTIFY_INTERNAL 3 |
---|
2789 | |
---|
2790 | #define DOPRNT_SHOWBASE_YES 1 |
---|
2791 | #define DOPRNT_SHOWBASE_NO 2 |
---|
2792 | #define DOPRNT_SHOWBASE_NONZERO 3 |
---|
2793 | |
---|
2794 | struct doprnt_params_t { |
---|
2795 | int base; /* negative for upper case */ |
---|
2796 | int conv; /* choices above */ |
---|
2797 | const char *expfmt; /* exponent format */ |
---|
2798 | int exptimes4; /* exponent multiply by 4 */ |
---|
2799 | char fill; /* character */ |
---|
2800 | int justify; /* choices above */ |
---|
2801 | int prec; /* prec field, or -1 for all digits */ |
---|
2802 | int showbase; /* choices above */ |
---|
2803 | int showpoint; /* if radix point always shown */ |
---|
2804 | int showtrailing; /* if trailing zeros wanted */ |
---|
2805 | char sign; /* '+', ' ', or '\0' */ |
---|
2806 | int width; /* width field */ |
---|
2807 | }; |
---|
2808 | |
---|
2809 | #if _GMP_H_HAVE_VA_LIST |
---|
2810 | |
---|
2811 | typedef int (*doprnt_format_t) _PROTO ((void *data, const char *fmt, va_list ap)); |
---|
2812 | typedef int (*doprnt_memory_t) _PROTO ((void *data, const char *str, size_t len)); |
---|
2813 | typedef int (*doprnt_reps_t) _PROTO ((void *data, int c, int reps)); |
---|
2814 | typedef int (*doprnt_final_t) _PROTO ((void *data)); |
---|
2815 | |
---|
2816 | struct doprnt_funs_t { |
---|
2817 | doprnt_format_t format; |
---|
2818 | doprnt_memory_t memory; |
---|
2819 | doprnt_reps_t reps; |
---|
2820 | doprnt_final_t final; /* NULL if not required */ |
---|
2821 | }; |
---|
2822 | |
---|
2823 | extern const struct doprnt_funs_t __gmp_fprintf_funs; |
---|
2824 | extern const struct doprnt_funs_t __gmp_sprintf_funs; |
---|
2825 | extern const struct doprnt_funs_t __gmp_snprintf_funs; |
---|
2826 | extern const struct doprnt_funs_t __gmp_obstack_printf_funs; |
---|
2827 | extern const struct doprnt_funs_t __gmp_ostream_funs; |
---|
2828 | |
---|
2829 | /* "buf" is a __gmp_allocate_func block of "alloc" many bytes. The first |
---|
2830 | "size" of these have been written. "alloc > size" is maintained, so |
---|
2831 | there's room to store a '\0' at the end. "result" is where the |
---|
2832 | application wants the final block pointer. */ |
---|
2833 | struct gmp_asprintf_t { |
---|
2834 | char **result; |
---|
2835 | char *buf; |
---|
2836 | size_t size; |
---|
2837 | size_t alloc; |
---|
2838 | }; |
---|
2839 | |
---|
2840 | #define GMP_ASPRINTF_T_INIT(d, output) \ |
---|
2841 | do { \ |
---|
2842 | (d).result = (output); \ |
---|
2843 | (d).alloc = 256; \ |
---|
2844 | (d).buf = (char *) (*__gmp_allocate_func) ((d).alloc); \ |
---|
2845 | (d).size = 0; \ |
---|
2846 | } while (0) |
---|
2847 | |
---|
2848 | /* If a realloc is necessary, use twice the size actually required, so as to |
---|
2849 | avoid repeated small reallocs. */ |
---|
2850 | #define GMP_ASPRINTF_T_NEED(d, n) \ |
---|
2851 | do { \ |
---|
2852 | size_t alloc, newsize, newalloc; \ |
---|
2853 | ASSERT ((d)->alloc >= (d)->size + 1); \ |
---|
2854 | \ |
---|
2855 | alloc = (d)->alloc; \ |
---|
2856 | newsize = (d)->size + (n); \ |
---|
2857 | if (alloc <= newsize) \ |
---|
2858 | { \ |
---|
2859 | newalloc = 2*newsize; \ |
---|
2860 | (d)->alloc = newalloc; \ |
---|
2861 | (d)->buf = __GMP_REALLOCATE_FUNC_TYPE ((d)->buf, \ |
---|
2862 | alloc, newalloc, char); \ |
---|
2863 | } \ |
---|
2864 | } while (0) |
---|
2865 | |
---|
2866 | __GMP_DECLSPEC int __gmp_asprintf_memory _PROTO ((struct gmp_asprintf_t *d, const char *str, size_t len)); |
---|
2867 | __GMP_DECLSPEC int __gmp_asprintf_reps _PROTO ((struct gmp_asprintf_t *d, int c, int reps)); |
---|
2868 | __GMP_DECLSPEC int __gmp_asprintf_final _PROTO ((struct gmp_asprintf_t *d)); |
---|
2869 | |
---|
2870 | /* buf is where to write the next output, and size is how much space is left |
---|
2871 | there. If the application passed size==0 then that's what we'll have |
---|
2872 | here, and nothing at all should be written. */ |
---|
2873 | struct gmp_snprintf_t { |
---|
2874 | char *buf; |
---|
2875 | size_t size; |
---|
2876 | }; |
---|
2877 | |
---|
2878 | /* Add the bytes printed by the call to the total retval, or bail out on an |
---|
2879 | error. */ |
---|
2880 | #define DOPRNT_ACCUMULATE(call) \ |
---|
2881 | do { \ |
---|
2882 | int __ret; \ |
---|
2883 | __ret = call; \ |
---|
2884 | if (__ret == -1) \ |
---|
2885 | goto error; \ |
---|
2886 | retval += __ret; \ |
---|
2887 | } while (0) |
---|
2888 | #define DOPRNT_ACCUMULATE_FUN(fun, params) \ |
---|
2889 | do { \ |
---|
2890 | ASSERT ((fun) != NULL); \ |
---|
2891 | DOPRNT_ACCUMULATE ((*(fun)) params); \ |
---|
2892 | } while (0) |
---|
2893 | |
---|
2894 | #define DOPRNT_FORMAT(fmt, ap) \ |
---|
2895 | DOPRNT_ACCUMULATE_FUN (funs->format, (data, fmt, ap)) |
---|
2896 | #define DOPRNT_MEMORY(ptr, len) \ |
---|
2897 | DOPRNT_ACCUMULATE_FUN (funs->memory, (data, ptr, len)) |
---|
2898 | #define DOPRNT_REPS(c, n) \ |
---|
2899 | DOPRNT_ACCUMULATE_FUN (funs->reps, (data, c, n)) |
---|
2900 | |
---|
2901 | #define DOPRNT_STRING(str) DOPRNT_MEMORY (str, strlen (str)) |
---|
2902 | |
---|
2903 | #define DOPRNT_REPS_MAYBE(c, n) \ |
---|
2904 | do { \ |
---|
2905 | if ((n) != 0) \ |
---|
2906 | DOPRNT_REPS (c, n); \ |
---|
2907 | } while (0) |
---|
2908 | #define DOPRNT_MEMORY_MAYBE(ptr, len) \ |
---|
2909 | do { \ |
---|
2910 | if ((len) != 0) \ |
---|
2911 | DOPRNT_MEMORY (ptr, len); \ |
---|
2912 | } while (0) |
---|
2913 | |
---|
2914 | __GMP_DECLSPEC int __gmp_doprnt _PROTO ((const struct doprnt_funs_t *, void *, const char *, va_list)); |
---|
2915 | __GMP_DECLSPEC int __gmp_doprnt_integer _PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *)); |
---|
2916 | __GMP_DECLSPEC int __gmp_doprnt_mpf _PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, mpf_srcptr)); |
---|
2917 | /* why no __GMP_DECLSPEC here??? */ |
---|
2918 | int __gmp_replacement_vsnprintf _PROTO ((char *, size_t, const char *, va_list)); |
---|
2919 | #endif /* _GMP_H_HAVE_VA_LIST */ |
---|
2920 | |
---|
2921 | |
---|
2922 | typedef int (*gmp_doscan_scan_t) _PROTO ((void *, const char *, ...)); |
---|
2923 | typedef void *(*gmp_doscan_step_t) _PROTO ((void *, int)); |
---|
2924 | typedef int (*gmp_doscan_get_t) _PROTO ((void *)); |
---|
2925 | typedef int (*gmp_doscan_unget_t) _PROTO ((int, void *)); |
---|
2926 | |
---|
2927 | struct gmp_doscan_funs_t { |
---|
2928 | gmp_doscan_scan_t scan; |
---|
2929 | gmp_doscan_step_t step; |
---|
2930 | gmp_doscan_get_t get; |
---|
2931 | gmp_doscan_unget_t unget; |
---|
2932 | }; |
---|
2933 | extern const struct gmp_doscan_funs_t __gmp_fscanf_funs; |
---|
2934 | extern const struct gmp_doscan_funs_t __gmp_sscanf_funs; |
---|
2935 | |
---|
2936 | #if _GMP_H_HAVE_VA_LIST |
---|
2937 | int __gmp_doscan _PROTO ((const struct gmp_doscan_funs_t *, void *, |
---|
2938 | const char *, va_list)); |
---|
2939 | #endif |
---|
2940 | |
---|
2941 | |
---|
2942 | /* For testing and debugging. */ |
---|
2943 | #define MPZ_CHECK_FORMAT(z) \ |
---|
2944 | do { \ |
---|
2945 | ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0); \ |
---|
2946 | ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z)); \ |
---|
2947 | ASSERT_ALWAYS_MPN (PTR(z), ABSIZ(z)); \ |
---|
2948 | } while (0) |
---|
2949 | |
---|
2950 | #define MPQ_CHECK_FORMAT(q) \ |
---|
2951 | do { \ |
---|
2952 | MPZ_CHECK_FORMAT (mpq_numref (q)); \ |
---|
2953 | MPZ_CHECK_FORMAT (mpq_denref (q)); \ |
---|
2954 | ASSERT_ALWAYS (SIZ(mpq_denref(q)) >= 1); \ |
---|
2955 | \ |
---|
2956 | if (SIZ(mpq_numref(q)) == 0) \ |
---|
2957 | { \ |
---|
2958 | /* should have zero as 0/1 */ \ |
---|
2959 | ASSERT_ALWAYS (SIZ(mpq_denref(q)) == 1 \ |
---|
2960 | && PTR(mpq_denref(q))[0] == 1); \ |
---|
2961 | } \ |
---|
2962 | else \ |
---|
2963 | { \ |
---|
2964 | /* should have no common factors */ \ |
---|
2965 | mpz_t g; \ |
---|
2966 | mpz_init (g); \ |
---|
2967 | mpz_gcd (g, mpq_numref(q), mpq_denref(q)); \ |
---|
2968 | ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0); \ |
---|
2969 | mpz_clear (g); \ |
---|
2970 | } \ |
---|
2971 | } while (0) |
---|
2972 | |
---|
2973 | #define MPF_CHECK_FORMAT(f) \ |
---|
2974 | do { \ |
---|
2975 | ASSERT_ALWAYS (PREC(f) >= __GMPF_BITS_TO_PREC(53)); \ |
---|
2976 | ASSERT_ALWAYS (ABSIZ(f) <= PREC(f)+1); \ |
---|
2977 | if (SIZ(f) == 0) \ |
---|
2978 | ASSERT_ALWAYS (EXP(f) == 0); \ |
---|
2979 | if (SIZ(f) != 0) \ |
---|
2980 | ASSERT_ALWAYS (PTR(f)[ABSIZ(f) - 1] != 0); \ |
---|
2981 | } while (0) |
---|
2982 | |
---|
2983 | |
---|
2984 | #define MPZ_PROVOKE_REALLOC(z) \ |
---|
2985 | do { ALLOC(z) = ABSIZ(z); } while (0) |
---|
2986 | |
---|
2987 | |
---|
2988 | #if TUNE_PROGRAM_BUILD |
---|
2989 | /* Some extras wanted when recompiling some .c files for use by the tune |
---|
2990 | program. Not part of a normal build. */ |
---|
2991 | |
---|
2992 | extern mp_size_t mul_threshold[]; |
---|
2993 | extern mp_size_t fft_modf_mul_threshold; |
---|
2994 | extern mp_size_t sqr_threshold[]; |
---|
2995 | extern mp_size_t fft_modf_sqr_threshold; |
---|
2996 | extern mp_size_t sb_preinv_threshold[]; |
---|
2997 | extern mp_size_t dc_threshold[]; |
---|
2998 | extern mp_size_t powm_threshold[]; |
---|
2999 | extern mp_size_t gcd_accel_threshold[]; |
---|
3000 | extern mp_size_t gcdext_threshold[]; |
---|
3001 | extern mp_size_t divrem_1_norm_threshold[]; |
---|
3002 | extern mp_size_t divrem_1_unnorm_threshold[]; |
---|
3003 | extern mp_size_t divrem_2_threshold[]; |
---|
3004 | extern mp_size_t mod_1_norm_threshold[]; |
---|
3005 | extern mp_size_t mod_1_unnorm_threshold[]; |
---|
3006 | extern mp_size_t get_str_basecase_threshold[]; |
---|
3007 | extern mp_size_t get_str_precompute_threshold[]; |
---|
3008 | |
---|
3009 | #undef MUL_KARATSUBA_THRESHOLD |
---|
3010 | #undef MUL_TOOM3_THRESHOLD |
---|
3011 | #undef MUL_FFT_TABLE |
---|
3012 | #undef MUL_FFT_THRESHOLD |
---|
3013 | #undef MUL_FFT_MODF_THRESHOLD |
---|
3014 | #undef SQR_BASECASE_THRESHOLD |
---|
3015 | #undef SQR_KARATSUBA_THRESHOLD |
---|
3016 | #undef SQR_TOOM3_THRESHOLD |
---|
3017 | #undef SQR_FFT_TABLE |
---|
3018 | #undef SQR_FFT_THRESHOLD |
---|
3019 | #undef SQR_FFT_MODF_THRESHOLD |
---|
3020 | #undef DIV_DC_THRESHOLD |
---|
3021 | #undef POWM_THRESHOLD |
---|
3022 | #undef GCD_ACCEL_THRESHOLD |
---|
3023 | #undef GCDEXT_THRESHOLD |
---|
3024 | #undef DIVREM_1_NORM_THRESHOLD |
---|
3025 | #undef DIVREM_1_UNNORM_THRESHOLD |
---|
3026 | #undef MOD_1_NORM_THRESHOLD |
---|
3027 | #undef MOD_1_UNNORM_THRESHOLD |
---|
3028 | #undef GET_STR_DC_THRESHOLD |
---|
3029 | #undef GET_STR_PRECOMPUTE_THRESHOLD |
---|
3030 | |
---|
3031 | #define MUL_KARATSUBA_THRESHOLD mul_threshold[0] |
---|
3032 | #define MUL_TOOM3_THRESHOLD mul_threshold[1] |
---|
3033 | #define MUL_FFT_TABLE { 0 } |
---|
3034 | #define MUL_FFT_THRESHOLD mul_threshold[2] |
---|
3035 | #define MUL_FFT_MODF_THRESHOLD fft_modf_mul_threshold |
---|
3036 | #define SQR_BASECASE_THRESHOLD sqr_threshold[0] |
---|
3037 | #define SQR_KARATSUBA_THRESHOLD sqr_threshold[1] |
---|
3038 | #define SQR_TOOM3_THRESHOLD sqr_threshold[2] |
---|
3039 | #define SQR_FFT_TABLE { 0 } |
---|
3040 | #define SQR_FFT_THRESHOLD sqr_threshold[3] |
---|
3041 | #define SQR_FFT_MODF_THRESHOLD fft_modf_sqr_threshold |
---|
3042 | #define DIV_DC_THRESHOLD dc_threshold[0] |
---|
3043 | #define POWM_THRESHOLD powm_threshold[0] |
---|
3044 | #define GCD_ACCEL_THRESHOLD gcd_accel_threshold[0] |
---|
3045 | #define GCDEXT_THRESHOLD gcdext_threshold[0] |
---|
3046 | #define DIVREM_1_NORM_THRESHOLD divrem_1_norm_threshold[0] |
---|
3047 | #define DIVREM_1_UNNORM_THRESHOLD divrem_1_unnorm_threshold[0] |
---|
3048 | #define MOD_1_NORM_THRESHOLD mod_1_norm_threshold[0] |
---|
3049 | #define MOD_1_UNNORM_THRESHOLD mod_1_unnorm_threshold[0] |
---|
3050 | #define GET_STR_DC_THRESHOLD get_str_basecase_threshold[0] |
---|
3051 | #define GET_STR_PRECOMPUTE_THRESHOLD get_str_precompute_threshold[0] |
---|
3052 | |
---|
3053 | #if ! UDIV_PREINV_ALWAYS |
---|
3054 | #undef DIV_SB_PREINV_THRESHOLD |
---|
3055 | #undef DIVREM_2_THRESHOLD |
---|
3056 | #define DIV_SB_PREINV_THRESHOLD sb_preinv_threshold[0] |
---|
3057 | #define DIVREM_2_THRESHOLD divrem_2_threshold[0] |
---|
3058 | #endif |
---|
3059 | |
---|
3060 | /* Sizes the tune program tests up to, used in a couple of recompilations. */ |
---|
3061 | #define SQR_KARATSUBA_MAX_GENERIC 200 |
---|
3062 | #define MUL_TOOM3_THRESHOLD_LIMIT 700 |
---|
3063 | #define GET_STR_THRESHOLD_LIMIT 500 |
---|
3064 | |
---|
3065 | #undef FFT_TABLE_ATTRS |
---|
3066 | #define FFT_TABLE_ATTRS |
---|
3067 | extern mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE]; |
---|
3068 | |
---|
3069 | #if TUNE_PROGRAM_BUILD_SQR |
---|
3070 | #undef SQR_KARATSUBA_THRESHOLD |
---|
3071 | #define SQR_KARATSUBA_THRESHOLD SQR_KARATSUBA_MAX_GENERIC |
---|
3072 | #endif |
---|
3073 | |
---|
3074 | #endif /* TUNE_PROGRAM_BUILD */ |
---|
3075 | |
---|
3076 | #if defined (__cplusplus) |
---|
3077 | } |
---|
3078 | #endif |
---|
3079 | |
---|
3080 | |
---|
3081 | #ifdef __cplusplus |
---|
3082 | |
---|
3083 | /* A little helper for a null-terminated __gmp_allocate_func string. |
---|
3084 | The destructor ensures it's freed even if an exception is thrown. */ |
---|
3085 | class gmp_allocated_string { |
---|
3086 | public: |
---|
3087 | char *str; |
---|
3088 | gmp_allocated_string(char *arg) { str = arg; } |
---|
3089 | ~gmp_allocated_string() { (*__gmp_free_func) (str, strlen(str)+1); } |
---|
3090 | }; |
---|
3091 | |
---|
3092 | int __gmp_istream_set_base (std::istream &, char &, bool &, bool &); |
---|
3093 | void __gmp_istream_set_digits (std::string &, std::istream &, char &, bool &, int); |
---|
3094 | void __gmp_doprnt_params_from_ios (struct doprnt_params_t *p, std::ios &o); |
---|
3095 | std::ostream& __gmp_doprnt_integer_ostream (std::ostream &o, struct doprnt_params_t *p, char *s); |
---|
3096 | extern const struct doprnt_funs_t __gmp_asprintf_funs_noformat; |
---|
3097 | |
---|
3098 | #endif /* __cplusplus */ |
---|
3099 | |
---|
3100 | #endif /* __GMP_IMPL_H__ */ |
---|