[15293] | 1 | /* Include file for internal GNU MP types and definitions. |
---|
| 2 | |
---|
| 3 | THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO |
---|
| 4 | BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES. |
---|
| 5 | |
---|
[18190] | 6 | Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002 Free |
---|
| 7 | Software Foundation, Inc. |
---|
[15293] | 8 | |
---|
| 9 | This file is part of the GNU MP Library. |
---|
| 10 | |
---|
| 11 | The GNU MP Library is free software; you can redistribute it and/or modify |
---|
| 12 | it under the terms of the GNU Lesser General Public License as published by |
---|
| 13 | the Free Software Foundation; either version 2.1 of the License, or (at your |
---|
| 14 | option) any later version. |
---|
| 15 | |
---|
| 16 | The GNU MP Library is distributed in the hope that it will be useful, but |
---|
| 17 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
---|
| 18 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
---|
| 19 | License for more details. |
---|
| 20 | |
---|
| 21 | You should have received a copy of the GNU Lesser General Public License |
---|
| 22 | along with the GNU MP Library; see the file COPYING.LIB. If not, write to |
---|
| 23 | the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
---|
| 24 | MA 02111-1307, USA. */ |
---|
| 25 | |
---|
[18190] | 26 | |
---|
| 27 | #ifndef __GMP_IMPL_H__ |
---|
| 28 | #define __GMP_IMPL_H__ |
---|
| 29 | |
---|
| 30 | /* On Cray vector systems "short" and "unsigned short" might not be the same |
---|
| 31 | number of bits, making the SHRT_MAX defaults below fail. (This depends |
---|
| 32 | on compiler options.) Instead use limits.h. */ |
---|
| 33 | #if defined _CRAY |
---|
| 34 | #include <limits.h> |
---|
| 35 | #endif |
---|
| 36 | |
---|
| 37 | #if ! __GMP_WITHIN_CONFIGURE |
---|
[15293] | 38 | #include "config.h" |
---|
| 39 | #include "gmp-mparam.h" |
---|
[18190] | 40 | #endif |
---|
[15293] | 41 | |
---|
[18190] | 42 | #ifdef __cplusplus |
---|
| 43 | #include <string> |
---|
[15293] | 44 | #endif |
---|
| 45 | |
---|
[18190] | 46 | /* Might search and replace _PROTO to __GMP_PROTO internally one day, to |
---|
| 47 | avoid two names for one thing, but no hurry for that. */ |
---|
| 48 | #define _PROTO(x) __GMP_PROTO(x) |
---|
| 49 | |
---|
| 50 | /* The following tries to get a good version of alloca. The tests are |
---|
| 51 | adapted from autoconf AC_FUNC_ALLOCA, with a couple of additions. |
---|
| 52 | Whether this succeeds is tested by GMP_FUNC_ALLOCA and HAVE_ALLOCA will |
---|
| 53 | be setup appropriately. |
---|
| 54 | |
---|
| 55 | ifndef alloca - a cpp define might already exist. |
---|
| 56 | glibc <stdlib.h> includes <alloca.h> which uses GCC __builtin_alloca. |
---|
| 57 | HP cc +Olibcalls adds a #define of alloca to __builtin_alloca. |
---|
| 58 | |
---|
| 59 | GCC __builtin_alloca - preferred whenever available. |
---|
| 60 | |
---|
| 61 | _AIX pragma - IBM compilers need a #pragma in "each module that needs to |
---|
| 62 | use alloca". Pragma indented to protect pre-ANSI cpp's. _IBMR2 was |
---|
| 63 | used in past versions of GMP, retained still in case it matters. |
---|
| 64 | |
---|
| 65 | The autoconf manual says this pragma needs to be at the start of a C |
---|
| 66 | file, apart from comments and preprocessor directives. Is that true? |
---|
| 67 | xlc on aix 4.xxx doesn't seem to mind it being after prototypes etc |
---|
| 68 | from gmp.h. |
---|
| 69 | */ |
---|
| 70 | |
---|
| 71 | #ifndef alloca |
---|
| 72 | # ifdef __GNUC__ |
---|
| 73 | # define alloca __builtin_alloca |
---|
| 74 | # else |
---|
| 75 | # ifdef __DECC |
---|
| 76 | # define alloca(x) __ALLOCA(x) |
---|
| 77 | # else |
---|
| 78 | # ifdef _MSC_VER |
---|
| 79 | # include <malloc.h> |
---|
| 80 | # define alloca _alloca |
---|
| 81 | # else |
---|
| 82 | # if HAVE_ALLOCA_H |
---|
| 83 | # include <alloca.h> |
---|
| 84 | # else |
---|
| 85 | # if defined (_AIX) || defined (_IBMR2) |
---|
| 86 | #pragma alloca |
---|
| 87 | # else |
---|
| 88 | char *alloca (); |
---|
| 89 | # endif |
---|
| 90 | # endif |
---|
| 91 | # endif |
---|
| 92 | # endif |
---|
| 93 | # endif |
---|
[15293] | 94 | #endif |
---|
[18190] | 95 | |
---|
| 96 | |
---|
| 97 | /* const and signed must match __gmp_const and __gmp_signed, so follow the |
---|
| 98 | decision made for those in gmp.h. */ |
---|
| 99 | #if ! __GMP_HAVE_CONST |
---|
| 100 | #define const /* empty */ |
---|
| 101 | #define signed /* empty */ |
---|
[15293] | 102 | #endif |
---|
[18190] | 103 | |
---|
| 104 | |
---|
| 105 | /* "const" basically means a function does nothing but examine its arguments |
---|
| 106 | and give a return value, it doesn't read or write any memory (neither |
---|
| 107 | global nor pointed to by arguments), and has no other side-effects. This |
---|
| 108 | is more restrictive than "pure". See info node "(gcc)Function |
---|
| 109 | Attributes". */ |
---|
| 110 | #if HAVE_ATTRIBUTE_CONST |
---|
| 111 | #define ATTRIBUTE_CONST __attribute__ ((const)) |
---|
| 112 | #else |
---|
| 113 | #define ATTRIBUTE_CONST |
---|
[15293] | 114 | #endif |
---|
[18190] | 115 | |
---|
| 116 | #if HAVE_ATTRIBUTE_NORETURN |
---|
| 117 | #define ATTRIBUTE_NORETURN __attribute__ ((noreturn)) |
---|
| 118 | #else |
---|
| 119 | #define ATTRIBUTE_NORETURN |
---|
[15293] | 120 | #endif |
---|
| 121 | |
---|
[18190] | 122 | /* "malloc" means a function behaves like malloc in that the pointer it |
---|
| 123 | returns doesn't alias anything. */ |
---|
| 124 | #if HAVE_ATTRIBUTE_MALLOC |
---|
| 125 | #define ATTRIBUTE_MALLOC __attribute__ ((malloc)) |
---|
| 126 | #else |
---|
| 127 | #define ATTRIBUTE_MALLOC |
---|
[15293] | 128 | #endif |
---|
| 129 | |
---|
[18190] | 130 | #ifdef _CRAY |
---|
| 131 | #define CRAY_Pragma(str) _Pragma (str) |
---|
[15293] | 132 | #else |
---|
[18190] | 133 | #define CRAY_Pragma(str) |
---|
| 134 | #endif |
---|
| 135 | |
---|
| 136 | |
---|
| 137 | #if ! HAVE_STRCHR |
---|
| 138 | #define strchr(s,c) index(s,c) |
---|
| 139 | #endif |
---|
| 140 | |
---|
| 141 | #if ! HAVE_MEMSET |
---|
| 142 | #define memset(p, c, n) \ |
---|
| 143 | do { \ |
---|
| 144 | ASSERT (n >= 0); \ |
---|
| 145 | int __i; \ |
---|
| 146 | for (__i = 0; __i < (n); __i++) \ |
---|
| 147 | (p)[__i] = (c); \ |
---|
| 148 | } while (0) |
---|
| 149 | #endif |
---|
| 150 | |
---|
| 151 | /* va_copy is standard in C99, and gcc provides __va_copy when in strict C89 |
---|
| 152 | mode. Falling back to a memcpy will give maximum portability, since it |
---|
| 153 | works no matter whether va_list is a pointer, struct or array. */ |
---|
| 154 | #if ! defined (va_copy) && defined (__va_copy) |
---|
| 155 | #define va_copy(dst,src) __va_copy(dst,src) |
---|
| 156 | #endif |
---|
| 157 | #if ! defined (va_copy) |
---|
| 158 | #define va_copy(dst,src) \ |
---|
| 159 | do { memcpy (&(dst), &(src), sizeof (va_list)); } while (0) |
---|
| 160 | #endif |
---|
| 161 | |
---|
| 162 | |
---|
| 163 | #if defined (__cplusplus) |
---|
| 164 | extern "C" { |
---|
| 165 | #endif |
---|
| 166 | |
---|
| 167 | |
---|
| 168 | /* Usage: TMP_DECL (marker); |
---|
| 169 | TMP_MARK (marker); |
---|
| 170 | ptr = TMP_ALLOC (bytes); |
---|
| 171 | TMP_FREE (marker); |
---|
| 172 | |
---|
| 173 | TMP_DECL just declares a variable, but might be empty and so must be last |
---|
| 174 | in a list of variables. TMP_MARK must be done before any TMP_ALLOC. |
---|
| 175 | TMP_ALLOC(0) is not allowed. TMP_FREE doesn't need to be done if a |
---|
| 176 | TMP_MARK was made, but then no TMP_ALLOCs. |
---|
| 177 | |
---|
| 178 | The name "marker" isn't used by the malloc-reentrant and debug methods, |
---|
| 179 | instead they hardcode a name which TMP_ALLOC will know. For that reason |
---|
| 180 | two TMP_DECLs are not allowed, unless one is in a nested "{ }" block, and |
---|
| 181 | in that case TMP_MARK, TMP_ALLOC and TMP_FREE will refer to the TMP_DECL |
---|
| 182 | which is in scope, irrespective of the marker name given. */ |
---|
| 183 | |
---|
| 184 | /* The alignment in bytes, used for TMP_ALLOCed blocks, when alloca or |
---|
| 185 | __gmp_allocate_func doesn't already determine it. Currently TMP_ALLOC |
---|
| 186 | isn't used for "double"s, so that's not in the union. */ |
---|
| 187 | union tmp_align_t { |
---|
| 188 | mp_limb_t l; |
---|
| 189 | char *p; |
---|
| 190 | }; |
---|
| 191 | #define __TMP_ALIGN sizeof (union tmp_align_t) |
---|
| 192 | |
---|
| 193 | /* Return "a" rounded upwards to a multiple of "m", if it isn't already. |
---|
| 194 | "a" must be an unsigned type. */ |
---|
| 195 | #define ROUND_UP_MULTIPLE(a,m) ((a) + (-(a))%(m)) |
---|
| 196 | |
---|
| 197 | #if WANT_TMP_ALLOCA |
---|
| 198 | /* Each TMP_ALLOC is simply an alloca(), and nothing else is needed. |
---|
| 199 | This is the preferred method. */ |
---|
[15293] | 200 | #define TMP_DECL(m) |
---|
| 201 | #define TMP_ALLOC(x) alloca(x) |
---|
| 202 | #define TMP_MARK(m) |
---|
| 203 | #define TMP_FREE(m) |
---|
| 204 | #endif |
---|
| 205 | |
---|
[18190] | 206 | #if WANT_TMP_REENTRANT |
---|
| 207 | /* See tal-reent.c for some comments. */ |
---|
| 208 | struct tmp_reentrant_t { |
---|
| 209 | struct tmp_reentrant_t *next; |
---|
| 210 | size_t size; /* bytes, including header */ |
---|
| 211 | }; |
---|
| 212 | void *__gmp_tmp_reentrant_alloc _PROTO ((struct tmp_reentrant_t **, size_t)) ATTRIBUTE_MALLOC; |
---|
| 213 | void __gmp_tmp_reentrant_free _PROTO ((struct tmp_reentrant_t *)); |
---|
| 214 | #define TMP_DECL(marker) struct tmp_reentrant_t *__tmp_marker |
---|
| 215 | /* don't demand NULL, just cast a zero */ |
---|
| 216 | #define TMP_MARK(marker) \ |
---|
| 217 | do { __tmp_marker = (struct tmp_reentrant_t *) 0; } while (0) |
---|
| 218 | #define TMP_ALLOC(size) __gmp_tmp_reentrant_alloc (&__tmp_marker, size) |
---|
| 219 | #define TMP_FREE(marker) __gmp_tmp_reentrant_free (__tmp_marker) |
---|
| 220 | #endif |
---|
| 221 | |
---|
| 222 | #if WANT_TMP_NOTREENTRANT |
---|
| 223 | /* See tal-notreent.c for some comments. */ |
---|
| 224 | struct tmp_marker |
---|
| 225 | { |
---|
| 226 | struct tmp_stack *which_chunk; |
---|
| 227 | void *alloc_point; |
---|
| 228 | }; |
---|
| 229 | typedef struct tmp_marker tmp_marker; |
---|
| 230 | void *__gmp_tmp_alloc _PROTO ((unsigned long)) ATTRIBUTE_MALLOC; |
---|
| 231 | void __gmp_tmp_mark _PROTO ((tmp_marker *)); |
---|
| 232 | void __gmp_tmp_free _PROTO ((tmp_marker *)); |
---|
| 233 | #define TMP_DECL(marker) tmp_marker marker |
---|
| 234 | /* gcc recognises "(-(8*n))%8" or the like is always zero, which means the |
---|
| 235 | rounding up is a noop for allocs of whole limbs. */ |
---|
| 236 | #define TMP_ALLOC(size) \ |
---|
| 237 | __gmp_tmp_alloc (ROUND_UP_MULTIPLE ((unsigned long) (size), __TMP_ALIGN)) |
---|
| 238 | #define TMP_MARK(marker) __gmp_tmp_mark (&marker) |
---|
| 239 | #define TMP_FREE(marker) __gmp_tmp_free (&marker) |
---|
| 240 | #endif |
---|
| 241 | |
---|
| 242 | #if WANT_TMP_DEBUG |
---|
| 243 | /* See tal-debug.c for some comments. */ |
---|
| 244 | struct tmp_debug_t { |
---|
| 245 | struct tmp_debug_entry_t *list; |
---|
| 246 | const char *file; |
---|
| 247 | int line; |
---|
| 248 | }; |
---|
| 249 | struct tmp_debug_entry_t { |
---|
| 250 | struct tmp_debug_entry_t *next; |
---|
| 251 | char *block; |
---|
| 252 | size_t size; |
---|
| 253 | }; |
---|
| 254 | void __gmp_tmp_debug_mark _PROTO ((const char *, int, struct tmp_debug_t **, |
---|
| 255 | struct tmp_debug_t *, |
---|
| 256 | const char *, const char *)); |
---|
| 257 | void *__gmp_tmp_debug_alloc _PROTO ((const char *, int, int, |
---|
| 258 | struct tmp_debug_t **, const char *, |
---|
| 259 | size_t)) ATTRIBUTE_MALLOC; |
---|
| 260 | void __gmp_tmp_debug_free _PROTO ((const char *, int, int, |
---|
| 261 | struct tmp_debug_t **, |
---|
| 262 | const char *, const char *)); |
---|
| 263 | #if HAVE_STRINGIZE |
---|
| 264 | #define TMP_DECL(marker) TMP_DECL_NAME(marker, #marker) |
---|
| 265 | #define TMP_MARK(marker) TMP_MARK_NAME(marker, #marker) |
---|
| 266 | #define TMP_FREE(marker) TMP_FREE_NAME(marker, #marker) |
---|
| 267 | #else |
---|
| 268 | #define TMP_DECL(marker) TMP_DECL_NAME(marker, "marker") |
---|
| 269 | #define TMP_MARK(marker) TMP_MARK_NAME(marker, "marker") |
---|
| 270 | #define TMP_FREE(marker) TMP_FREE_NAME(marker, "marker") |
---|
| 271 | #endif |
---|
| 272 | /* The marker variable is designed to provoke an uninitialized varialble |
---|
| 273 | warning from the compiler if TMP_FREE is used without a TMP_MARK. |
---|
| 274 | __tmp_marker_inscope does the same for TMP_ALLOC. Runtime tests pick |
---|
| 275 | these things up too. */ |
---|
| 276 | #define TMP_DECL_NAME(marker, marker_name) \ |
---|
| 277 | int marker; \ |
---|
| 278 | int __tmp_marker_inscope; \ |
---|
| 279 | const char *__tmp_marker_name = marker_name; \ |
---|
| 280 | struct tmp_debug_t __tmp_marker_struct; \ |
---|
| 281 | /* don't demand NULL, just cast a zero */ \ |
---|
| 282 | struct tmp_debug_t *__tmp_marker = (struct tmp_debug_t *) 0 |
---|
| 283 | #define TMP_MARK_NAME(marker, marker_name) \ |
---|
| 284 | do { \ |
---|
| 285 | marker = 1; \ |
---|
| 286 | __tmp_marker_inscope = 1; \ |
---|
| 287 | __gmp_tmp_debug_mark (ASSERT_FILE, ASSERT_LINE, \ |
---|
| 288 | &__tmp_marker, &__tmp_marker_struct, \ |
---|
| 289 | __tmp_marker_name, marker_name); \ |
---|
| 290 | } while (0) |
---|
| 291 | #define TMP_ALLOC(size) \ |
---|
| 292 | __gmp_tmp_debug_alloc (ASSERT_FILE, ASSERT_LINE, \ |
---|
| 293 | __tmp_marker_inscope, \ |
---|
| 294 | &__tmp_marker, __tmp_marker_name, size) |
---|
| 295 | #define TMP_FREE_NAME(marker, marker_name) \ |
---|
| 296 | do { \ |
---|
| 297 | __gmp_tmp_debug_free (ASSERT_FILE, ASSERT_LINE, \ |
---|
| 298 | marker, &__tmp_marker, \ |
---|
| 299 | __tmp_marker_name, marker_name); \ |
---|
| 300 | } while (0) |
---|
| 301 | #endif |
---|
| 302 | |
---|
| 303 | |
---|
[15293] | 304 | /* Allocating various types. */ |
---|
| 305 | #define TMP_ALLOC_TYPE(n,type) ((type *) TMP_ALLOC ((n) * sizeof (type))) |
---|
| 306 | #define TMP_ALLOC_LIMBS(n) TMP_ALLOC_TYPE(n,mp_limb_t) |
---|
| 307 | #define TMP_ALLOC_MP_PTRS(n) TMP_ALLOC_TYPE(n,mp_ptr) |
---|
| 308 | |
---|
[18190] | 309 | /* It's more efficient to allocate one block than two. This is certainly |
---|
| 310 | true of the malloc methods, but it can even be true of alloca if that |
---|
| 311 | involves copying a chunk of stack (various RISCs), or a call to a stack |
---|
| 312 | bounds check (mingw). In any case, when debugging keep separate blocks |
---|
| 313 | so a redzoning malloc debugger can protect each individually. */ |
---|
| 314 | #if WANT_TMP_DEBUG |
---|
| 315 | #define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize) \ |
---|
| 316 | do { \ |
---|
| 317 | (xp) = TMP_ALLOC_LIMBS (xsize); \ |
---|
| 318 | (yp) = TMP_ALLOC_LIMBS (ysize); \ |
---|
| 319 | } while (0) |
---|
| 320 | #else |
---|
| 321 | #define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize) \ |
---|
| 322 | do { \ |
---|
| 323 | (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize)); \ |
---|
| 324 | (yp) = (xp) + (xsize); \ |
---|
| 325 | } while (0) |
---|
[15293] | 326 | #endif |
---|
| 327 | |
---|
[18190] | 328 | |
---|
| 329 | /* From gmp.h, nicer names for internal use. */ |
---|
| 330 | #define MPN_CMP(result, xp, yp, size) __GMPN_CMP(result, xp, yp, size) |
---|
| 331 | |
---|
| 332 | #define ABS(x) ((x) >= 0 ? (x) : -(x)) |
---|
[15293] | 333 | #define MIN(l,o) ((l) < (o) ? (l) : (o)) |
---|
| 334 | #define MAX(h,i) ((h) > (i) ? (h) : (i)) |
---|
| 335 | #define numberof(x) (sizeof (x) / sizeof ((x)[0])) |
---|
| 336 | |
---|
| 337 | /* Field access macros. */ |
---|
| 338 | #define SIZ(x) ((x)->_mp_size) |
---|
| 339 | #define ABSIZ(x) ABS (SIZ (x)) |
---|
| 340 | #define PTR(x) ((x)->_mp_d) |
---|
| 341 | #define LIMBS(x) ((x)->_mp_d) |
---|
| 342 | #define EXP(x) ((x)->_mp_exp) |
---|
| 343 | #define PREC(x) ((x)->_mp_prec) |
---|
| 344 | #define ALLOC(x) ((x)->_mp_alloc) |
---|
| 345 | |
---|
[18190] | 346 | /* n-1 inverts any low zeros and the lowest one bit. If n&(n-1) leaves zero |
---|
| 347 | then that lowest one bit must have been the only bit set. n==0 will |
---|
| 348 | return true though, so avoid that. */ |
---|
| 349 | #define POW2_P(n) (((n) & ((n) - 1)) == 0) |
---|
[15293] | 350 | |
---|
| 351 | |
---|
[18190] | 352 | /* Might be already defined by gmp-mparam.h, otherwise use what's in gmp.h. */ |
---|
| 353 | #ifndef BITS_PER_MP_LIMB |
---|
| 354 | #define BITS_PER_MP_LIMB __GMP_BITS_PER_MP_LIMB |
---|
| 355 | #endif |
---|
[15293] | 356 | |
---|
[18190] | 357 | |
---|
| 358 | /* The "short" defines are a bit different because shorts are promoted to |
---|
| 359 | ints by ~ or >> etc. */ |
---|
| 360 | |
---|
[15293] | 361 | #ifndef ULONG_MAX |
---|
[18190] | 362 | #define ULONG_MAX __GMP_ULONG_MAX |
---|
[15293] | 363 | #endif |
---|
[18190] | 364 | #ifndef UINT_MAX |
---|
| 365 | #define UINT_MAX __GMP_UINT_MAX |
---|
| 366 | #endif |
---|
| 367 | #ifndef USHRT_MAX |
---|
| 368 | #define USHRT_MAX __GMP_USHRT_MAX |
---|
| 369 | #endif |
---|
| 370 | #define MP_LIMB_T_MAX (~ (mp_limb_t) 0) |
---|
| 371 | |
---|
| 372 | /* Must cast ULONG_MAX etc to unsigned long etc, since they might not be |
---|
| 373 | unsigned on a K&R compiler. In particular the HP-UX 10 bundled K&R cc |
---|
| 374 | treats the plain decimal values in <limits.h> as signed. */ |
---|
| 375 | #define ULONG_HIGHBIT (ULONG_MAX ^ ((unsigned long) ULONG_MAX >> 1)) |
---|
| 376 | #define UINT_HIGHBIT (UINT_MAX ^ ((unsigned) UINT_MAX >> 1)) |
---|
| 377 | #define USHRT_HIGHBIT ((unsigned short) (USHRT_MAX ^ ((unsigned short) USHRT_MAX >> 1))) |
---|
| 378 | #define GMP_LIMB_HIGHBIT (MP_LIMB_T_MAX ^ (MP_LIMB_T_MAX >> 1)) |
---|
| 379 | |
---|
| 380 | #ifndef LONG_MIN |
---|
| 381 | #define LONG_MIN ((long) ULONG_HIGHBIT) |
---|
| 382 | #endif |
---|
[15293] | 383 | #ifndef LONG_MAX |
---|
[18190] | 384 | #define LONG_MAX (-(LONG_MIN+1)) |
---|
[15293] | 385 | #endif |
---|
| 386 | |
---|
[18190] | 387 | #ifndef INT_MIN |
---|
| 388 | #define INT_MIN ((int) UINT_HIGHBIT) |
---|
[15293] | 389 | #endif |
---|
[18190] | 390 | #ifndef INT_MAX |
---|
| 391 | #define INT_MAX (-(INT_MIN+1)) |
---|
[15293] | 392 | #endif |
---|
| 393 | |
---|
[18190] | 394 | #ifndef SHRT_MIN |
---|
| 395 | #define SHRT_MIN ((short) USHRT_HIGHBIT) |
---|
| 396 | #endif |
---|
| 397 | #ifndef SHRT_MAX |
---|
| 398 | #define SHRT_MAX ((short) (-(SHRT_MIN+1))) |
---|
| 399 | #endif |
---|
[15293] | 400 | |
---|
[18190] | 401 | #if __GMP_MP_SIZE_T_INT |
---|
| 402 | #define MP_SIZE_T_MAX INT_MAX |
---|
| 403 | #define MP_SIZE_T_MIN INT_MIN |
---|
| 404 | #else |
---|
| 405 | #define MP_SIZE_T_MAX LONG_MAX |
---|
| 406 | #define MP_SIZE_T_MIN LONG_MIN |
---|
| 407 | #endif |
---|
| 408 | |
---|
| 409 | #define LONG_HIGHBIT LONG_MIN |
---|
| 410 | #define INT_HIGHBIT INT_MIN |
---|
| 411 | #define SHRT_HIGHBIT SHRT_MIN |
---|
| 412 | |
---|
| 413 | |
---|
| 414 | #define GMP_NUMB_HIGHBIT (CNST_LIMB(1) << (GMP_NUMB_BITS-1)) |
---|
| 415 | |
---|
| 416 | #if GMP_NAIL_BITS == 0 |
---|
| 417 | #define GMP_NAIL_LOWBIT CNST_LIMB(0) |
---|
| 418 | #else |
---|
| 419 | #define GMP_NAIL_LOWBIT (CNST_LIMB(1) << GMP_NUMB_BITS) |
---|
| 420 | #endif |
---|
| 421 | |
---|
| 422 | #if GMP_NAIL_BITS != 0 |
---|
| 423 | /* Set various *_THRESHOLD values to be used for nails. Thus we avoid using |
---|
| 424 | code that has not yet been qualified. */ |
---|
| 425 | |
---|
| 426 | #undef DIV_SB_PREINV_THRESHOLD |
---|
| 427 | #undef DIV_DC_THRESHOLD |
---|
| 428 | #undef POWM_THRESHOLD |
---|
| 429 | #define DIV_SB_PREINV_THRESHOLD MP_SIZE_T_MAX |
---|
| 430 | #define DIV_DC_THRESHOLD 50 |
---|
| 431 | #define POWM_THRESHOLD 0 |
---|
| 432 | |
---|
| 433 | #undef GCD_ACCEL_THRESHOLD |
---|
| 434 | #undef GCDEXT_THRESHOLD |
---|
| 435 | #define GCD_ACCEL_THRESHOLD 3 |
---|
| 436 | #define GCDEXT_THRESHOLD 20 |
---|
| 437 | |
---|
| 438 | #undef DIVREM_1_NORM_THRESHOLD |
---|
| 439 | #undef DIVREM_1_UNNORM_THRESHOLD |
---|
| 440 | #undef MOD_1_NORM_THRESHOLD |
---|
| 441 | #undef MOD_1_UNNORM_THRESHOLD |
---|
| 442 | #undef USE_PREINV_DIVREM_1 |
---|
| 443 | #undef USE_PREINV_MOD_1 |
---|
| 444 | #undef DIVREM_2_THRESHOLD |
---|
| 445 | #undef DIVEXACT_1_THRESHOLD |
---|
| 446 | #undef MODEXACT_1_ODD_THRESHOLD |
---|
| 447 | #define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* preinv always */ |
---|
| 448 | #define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* always */ |
---|
| 449 | #define MOD_1_NORM_THRESHOLD MP_SIZE_T_MAX /* always */ |
---|
| 450 | #define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* always */ |
---|
| 451 | #define USE_PREINV_DIVREM_1 0 /* preinv never */ |
---|
| 452 | #define USE_PREINV_MOD_1 0 /* preinv never */ |
---|
| 453 | #define DIVREM_2_THRESHOLD MP_SIZE_T_MAX /* preinv never */ |
---|
| 454 | #define DIVEXACT_1_THRESHOLD MP_SIZE_T_MAX /* always */ |
---|
| 455 | #define MODEXACT_1_ODD_THRESHOLD MP_SIZE_T_MAX /* always */ |
---|
| 456 | |
---|
| 457 | #undef GET_STR_DC_THRESHOLD |
---|
| 458 | #undef GET_STR_PRECOMPUTE_THRESHOLD |
---|
| 459 | #undef SET_STR_THRESHOLD |
---|
| 460 | #define GET_STR_DC_THRESHOLD 22 |
---|
| 461 | #define GET_STR_PRECOMPUTE_THRESHOLD 42 |
---|
| 462 | #define SET_STR_THRESHOLD 3259 |
---|
| 463 | |
---|
| 464 | #undef MUL_FFT_TABLE |
---|
| 465 | #undef MUL_FFT_MODF_THRESHOLD |
---|
| 466 | #undef MUL_FFT_THRESHOLD |
---|
| 467 | #define MUL_FFT_TABLE { 400, 928, 1856, 3840, 7168, 20480, 0 } |
---|
| 468 | #define MUL_FFT_MODF_THRESHOLD 416 |
---|
| 469 | #define MUL_FFT_THRESHOLD MP_SIZE_T_MAX |
---|
| 470 | |
---|
| 471 | #undef SQR_FFT_TABLE |
---|
| 472 | #undef SQR_FFT_MODF_THRESHOLD |
---|
| 473 | #undef SQR_FFT_THRESHOLD |
---|
| 474 | #define SQR_FFT_TABLE { 400, 992, 1984, 3840, 9216, 20480, 0 } |
---|
| 475 | #define SQR_FFT_MODF_THRESHOLD 416 |
---|
| 476 | #define SQR_FFT_THRESHOLD MP_SIZE_T_MAX |
---|
| 477 | |
---|
| 478 | #endif |
---|
| 479 | |
---|
[15293] | 480 | /* Swap macros. */ |
---|
| 481 | |
---|
| 482 | #define MP_LIMB_T_SWAP(x, y) \ |
---|
| 483 | do { \ |
---|
| 484 | mp_limb_t __mp_limb_t_swap__tmp = (x); \ |
---|
| 485 | (x) = (y); \ |
---|
| 486 | (y) = __mp_limb_t_swap__tmp; \ |
---|
| 487 | } while (0) |
---|
| 488 | #define MP_SIZE_T_SWAP(x, y) \ |
---|
| 489 | do { \ |
---|
| 490 | mp_size_t __mp_size_t_swap__tmp = (x); \ |
---|
| 491 | (x) = (y); \ |
---|
| 492 | (y) = __mp_size_t_swap__tmp; \ |
---|
| 493 | } while (0) |
---|
| 494 | |
---|
| 495 | #define MP_PTR_SWAP(x, y) \ |
---|
| 496 | do { \ |
---|
| 497 | mp_ptr __mp_ptr_swap__tmp = (x); \ |
---|
| 498 | (x) = (y); \ |
---|
| 499 | (y) = __mp_ptr_swap__tmp; \ |
---|
| 500 | } while (0) |
---|
| 501 | #define MP_SRCPTR_SWAP(x, y) \ |
---|
| 502 | do { \ |
---|
| 503 | mp_srcptr __mp_srcptr_swap__tmp = (x); \ |
---|
| 504 | (x) = (y); \ |
---|
| 505 | (y) = __mp_srcptr_swap__tmp; \ |
---|
| 506 | } while (0) |
---|
| 507 | |
---|
| 508 | #define MPN_PTR_SWAP(xp,xs, yp,ys) \ |
---|
| 509 | do { \ |
---|
| 510 | MP_PTR_SWAP (xp, yp); \ |
---|
| 511 | MP_SIZE_T_SWAP (xs, ys); \ |
---|
| 512 | } while(0) |
---|
| 513 | #define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \ |
---|
| 514 | do { \ |
---|
| 515 | MP_SRCPTR_SWAP (xp, yp); \ |
---|
| 516 | MP_SIZE_T_SWAP (xs, ys); \ |
---|
| 517 | } while(0) |
---|
| 518 | |
---|
| 519 | #define MPZ_PTR_SWAP(x, y) \ |
---|
| 520 | do { \ |
---|
| 521 | mpz_ptr __mpz_ptr_swap__tmp = (x); \ |
---|
| 522 | (x) = (y); \ |
---|
| 523 | (y) = __mpz_ptr_swap__tmp; \ |
---|
| 524 | } while (0) |
---|
| 525 | #define MPZ_SRCPTR_SWAP(x, y) \ |
---|
| 526 | do { \ |
---|
| 527 | mpz_srcptr __mpz_srcptr_swap__tmp = (x); \ |
---|
| 528 | (x) = (y); \ |
---|
| 529 | (y) = __mpz_srcptr_swap__tmp; \ |
---|
| 530 | } while (0) |
---|
| 531 | |
---|
| 532 | |
---|
[18190] | 533 | void *__gmp_default_allocate _PROTO ((size_t)); |
---|
| 534 | void *__gmp_default_reallocate _PROTO ((void *, size_t, size_t)); |
---|
| 535 | void __gmp_default_free _PROTO ((void *, size_t)); |
---|
[15293] | 536 | |
---|
[18190] | 537 | #define __GMP_ALLOCATE_FUNC_TYPE(n,type) \ |
---|
| 538 | ((type *) (*__gmp_allocate_func) ((n) * sizeof (type))) |
---|
| 539 | #define __GMP_ALLOCATE_FUNC_LIMBS(n) __GMP_ALLOCATE_FUNC_TYPE (n, mp_limb_t) |
---|
[15293] | 540 | |
---|
[18190] | 541 | #define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type) \ |
---|
| 542 | ((type *) (*__gmp_reallocate_func) \ |
---|
| 543 | (p, (old_size) * sizeof (type), (new_size) * sizeof (type))) |
---|
| 544 | #define __GMP_REALLOCATE_FUNC_LIMBS(p, old_size, new_size) \ |
---|
| 545 | __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, mp_limb_t) |
---|
[15293] | 546 | |
---|
[18190] | 547 | #define __GMP_FREE_FUNC_TYPE(p,n,type) (*__gmp_free_func) (p, (n) * sizeof (type)) |
---|
| 548 | #define __GMP_FREE_FUNC_LIMBS(p,n) __GMP_FREE_FUNC_TYPE (p, n, mp_limb_t) |
---|
[15293] | 549 | |
---|
[18190] | 550 | #define __GMP_REALLOCATE_FUNC_MAYBE(ptr, oldsize, newsize) \ |
---|
| 551 | do { \ |
---|
| 552 | if ((oldsize) != (newsize)) \ |
---|
| 553 | (ptr) = (*__gmp_reallocate_func) (ptr, oldsize, newsize); \ |
---|
| 554 | } while (0) |
---|
[15293] | 555 | |
---|
[18190] | 556 | #define __GMP_REALLOCATE_FUNC_MAYBE_TYPE(ptr, oldsize, newsize, type) \ |
---|
| 557 | do { \ |
---|
| 558 | if ((oldsize) != (newsize)) \ |
---|
| 559 | (ptr) = (type *) (*__gmp_reallocate_func) \ |
---|
| 560 | (ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type)); \ |
---|
| 561 | } while (0) |
---|
[15293] | 562 | |
---|
| 563 | |
---|
[18190] | 564 | /* Dummy for non-gcc, code involving it will go dead. */ |
---|
| 565 | #ifndef __GNUC__ |
---|
| 566 | #define __builtin_constant_p(x) 0 |
---|
| 567 | #endif |
---|
[15293] | 568 | |
---|
| 569 | |
---|
[18190] | 570 | /* In gcc 2.96 and up on i386, tail calls are optimized to jumps if the |
---|
| 571 | stack usage is compatible. __attribute__ ((regparm (N))) helps by |
---|
| 572 | putting leading parameters in registers, avoiding extra stack. */ |
---|
[15293] | 573 | |
---|
[18190] | 574 | #if HAVE_HOST_CPU_FAMILY_x86 && __GMP_GNUC_PREREQ (2,96) |
---|
| 575 | #define USE_LEADING_REGPARM 1 |
---|
| 576 | #else |
---|
| 577 | #define USE_LEADING_REGPARM 0 |
---|
[15293] | 578 | #endif |
---|
| 579 | |
---|
[18190] | 580 | /* Macros for altering parameter order according to regparm usage. */ |
---|
| 581 | #if USE_LEADING_REGPARM |
---|
| 582 | #define REGPARM_2_1(a,b,x) x,a,b |
---|
| 583 | #define REGPARM_3_1(a,b,c,x) x,a,b,c |
---|
| 584 | #define REGPARM_ATTR(n) __attribute__ ((regparm (n))) |
---|
| 585 | #else |
---|
| 586 | #define REGPARM_2_1(a,b,x) a,b,x |
---|
| 587 | #define REGPARM_3_1(a,b,c,x) a,b,c,x |
---|
| 588 | #define REGPARM_ATTR(n) |
---|
| 589 | #endif |
---|
| 590 | |
---|
| 591 | |
---|
| 592 | /* ASM_L gives a local label for a gcc asm block, for use when temporary |
---|
| 593 | local labels like "1:" might not be available, which is the case for |
---|
| 594 | instance on the x86s (the SCO assembler doesn't support them). |
---|
| 595 | |
---|
| 596 | The label generated is made unique by including "%=" which is a unique |
---|
| 597 | number for each insn. This ensures the same name can be used in multiple |
---|
| 598 | asm blocks, perhaps via a macro. Since jumps between asm blocks are not |
---|
| 599 | allowed there's no need for a label to be usable outside a single |
---|
| 600 | block. */ |
---|
| 601 | |
---|
| 602 | #define ASM_L(name) LSYM_PREFIX "asm_%=_" #name |
---|
| 603 | |
---|
| 604 | |
---|
| 605 | #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86 |
---|
| 606 | #if 0 |
---|
| 607 | /* FIXME: Check that these actually improve things. |
---|
| 608 | FIXME: Need a cld after each std. |
---|
| 609 | FIXME: Can't have inputs in clobbered registers, must describe them as |
---|
| 610 | dummy outputs, and add volatile. */ |
---|
[15293] | 611 | #define MPN_COPY_INCR(DST, SRC, N) \ |
---|
| 612 | __asm__ ("cld\n\trep\n\tmovsl" : : \ |
---|
| 613 | "D" (DST), "S" (SRC), "c" (N) : \ |
---|
| 614 | "cx", "di", "si", "memory") |
---|
| 615 | #define MPN_COPY_DECR(DST, SRC, N) \ |
---|
| 616 | __asm__ ("std\n\trep\n\tmovsl" : : \ |
---|
| 617 | "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) : \ |
---|
| 618 | "cx", "di", "si", "memory") |
---|
| 619 | #endif |
---|
| 620 | #endif |
---|
| 621 | |
---|
| 622 | |
---|
[18190] | 623 | void __gmpz_aorsmul_1 _PROTO ((REGPARM_3_1 (mpz_ptr w, mpz_srcptr u, mp_limb_t v, mp_size_t sub))) REGPARM_ATTR(1); |
---|
| 624 | #define mpz_aorsmul_1(w,u,v,sub) __gmpz_aorsmul_1 (REGPARM_3_1 (w, u, v, sub)) |
---|
| 625 | |
---|
| 626 | #define mpz_n_pow_ui __gmpz_n_pow_ui |
---|
| 627 | void mpz_n_pow_ui _PROTO ((mpz_ptr, mp_srcptr, mp_size_t, unsigned long)); |
---|
| 628 | |
---|
| 629 | |
---|
| 630 | #define mpn_add_nc __MPN(add_nc) |
---|
| 631 | __GMP_DECLSPEC mp_limb_t mpn_add_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)); |
---|
| 632 | |
---|
| 633 | #define mpn_addmul_1c __MPN(addmul_1c) |
---|
| 634 | __GMP_DECLSPEC mp_limb_t mpn_addmul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)); |
---|
| 635 | |
---|
| 636 | #define mpn_addsub_n __MPN(addsub_n) |
---|
| 637 | __GMP_DECLSPEC mp_limb_t mpn_addsub_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); |
---|
| 638 | |
---|
| 639 | #define mpn_addsub_nc __MPN(addsub_nc) |
---|
| 640 | __GMP_DECLSPEC mp_limb_t mpn_addsub_nc __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)); |
---|
| 641 | |
---|
| 642 | #define mpn_divrem_1c __MPN(divrem_1c) |
---|
| 643 | __GMP_DECLSPEC mp_limb_t mpn_divrem_1c __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)); |
---|
| 644 | |
---|
| 645 | #define mpn_dump __MPN(dump) |
---|
| 646 | __GMP_DECLSPEC void mpn_dump __GMP_PROTO ((mp_srcptr, mp_size_t)); |
---|
| 647 | |
---|
| 648 | #define mpn_fib2_ui __MPN(fib2_ui) |
---|
| 649 | mp_size_t mpn_fib2_ui _PROTO ((mp_ptr, mp_ptr, unsigned long)); |
---|
| 650 | |
---|
[15293] | 651 | /* Remap names of internal mpn functions. */ |
---|
| 652 | #define __clz_tab __MPN(clz_tab) |
---|
| 653 | #define mpn_udiv_w_sdiv __MPN(udiv_w_sdiv) |
---|
| 654 | |
---|
[18190] | 655 | #define mpn_gcd_finda __MPN(gcd_finda) |
---|
| 656 | mp_limb_t mpn_gcd_finda _PROTO((const mp_limb_t cp[2])) __GMP_ATTRIBUTE_PURE; |
---|
[15293] | 657 | |
---|
[18190] | 658 | #define mpn_jacobi_base __MPN(jacobi_base) |
---|
| 659 | int mpn_jacobi_base _PROTO ((mp_limb_t a, mp_limb_t b, int result_bit1)) ATTRIBUTE_CONST; |
---|
| 660 | |
---|
| 661 | #define mpn_mod_1c __MPN(mod_1c) |
---|
| 662 | __GMP_DECLSPEC mp_limb_t mpn_mod_1c __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE; |
---|
| 663 | |
---|
| 664 | #define mpn_mul_1c __MPN(mul_1c) |
---|
| 665 | __GMP_DECLSPEC mp_limb_t mpn_mul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)); |
---|
| 666 | |
---|
| 667 | #define mpn_mul_2 __MPN(mul_2) |
---|
| 668 | mp_limb_t mpn_mul_2 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)); |
---|
| 669 | |
---|
| 670 | #define mpn_mul_basecase __MPN(mul_basecase) |
---|
| 671 | __GMP_DECLSPEC void mpn_mul_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); |
---|
| 672 | |
---|
| 673 | #define mpn_sqr_n __MPN(sqr_n) |
---|
| 674 | __GMP_DECLSPEC void mpn_sqr_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t)); |
---|
| 675 | |
---|
| 676 | #define mpn_sqr_basecase __MPN(sqr_basecase) |
---|
| 677 | __GMP_DECLSPEC void mpn_sqr_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t)); |
---|
| 678 | |
---|
| 679 | #define mpn_sub_nc __MPN(sub_nc) |
---|
| 680 | __GMP_DECLSPEC mp_limb_t mpn_sub_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)); |
---|
| 681 | |
---|
| 682 | #define mpn_submul_1c __MPN(submul_1c) |
---|
| 683 | __GMP_DECLSPEC mp_limb_t mpn_submul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)); |
---|
| 684 | |
---|
| 685 | |
---|
| 686 | typedef __gmp_randstate_struct *gmp_randstate_ptr; |
---|
| 687 | |
---|
| 688 | #define _gmp_rand __gmp_rand |
---|
| 689 | __GMP_DECLSPEC void _gmp_rand _PROTO ((mp_ptr, gmp_randstate_t, unsigned long int)); |
---|
| 690 | |
---|
| 691 | |
---|
| 692 | /* __gmp_rands is the global state for the old-style random functions, and |
---|
| 693 | is also used in the test programs (hence the __GMP_DECLSPEC). |
---|
| 694 | |
---|
| 695 | There's no seeding here, so mpz_random etc will generate the same |
---|
| 696 | sequence every time. This is not unlike the C library random functions |
---|
| 697 | if you don't seed them, so perhaps it's acceptable. Digging up a seed |
---|
| 698 | from /dev/random or the like would work on many systems, but might |
---|
| 699 | encourage a false confidence, since it'd be pretty much impossible to do |
---|
| 700 | something that would work reliably everywhere. In any case the new style |
---|
| 701 | functions are recommended to applications which care about randomness, so |
---|
| 702 | the old functions aren't too important. */ |
---|
| 703 | |
---|
| 704 | __GMP_DECLSPEC extern char __gmp_rands_initialized; |
---|
| 705 | __GMP_DECLSPEC extern gmp_randstate_t __gmp_rands; |
---|
| 706 | |
---|
| 707 | #define RANDS \ |
---|
| 708 | ((__gmp_rands_initialized ? 0 \ |
---|
| 709 | : (__gmp_rands_initialized = 1, \ |
---|
| 710 | gmp_randinit_default (__gmp_rands), 0)), \ |
---|
| 711 | __gmp_rands) |
---|
| 712 | |
---|
| 713 | /* this is used by the test programs, to free memory */ |
---|
| 714 | #define RANDS_CLEAR() \ |
---|
| 715 | do { \ |
---|
| 716 | if (__gmp_rands_initialized) \ |
---|
| 717 | { \ |
---|
| 718 | __gmp_rands_initialized = 0; \ |
---|
| 719 | gmp_randclear (__gmp_rands); \ |
---|
| 720 | } \ |
---|
| 721 | } while (0) |
---|
| 722 | |
---|
| 723 | |
---|
| 724 | /* kara uses n+1 limbs of temporary space and then recurses with the |
---|
| 725 | balance, so need (n+1) + (ceil(n/2)+1) + (ceil(n/4)+1) + ... */ |
---|
| 726 | #define MPN_KARA_MUL_N_TSIZE(n) (2*((n)+BITS_PER_MP_LIMB)) |
---|
| 727 | #define MPN_KARA_SQR_N_TSIZE(n) (2*((n)+BITS_PER_MP_LIMB)) |
---|
| 728 | |
---|
| 729 | /* toom3 uses 4*(ceil(n/3)) of temporary space and then recurses with the |
---|
| 730 | balance either into itself or kara. The following might be |
---|
| 731 | overestimates. */ |
---|
| 732 | #define MPN_TOOM3_MUL_N_TSIZE(n) (2*(n) + 3*BITS_PER_MP_LIMB) |
---|
| 733 | #define MPN_TOOM3_SQR_N_TSIZE(n) (2*(n) + 3*BITS_PER_MP_LIMB) |
---|
| 734 | |
---|
| 735 | /* need 2 so that n2>=1 */ |
---|
| 736 | #define MPN_KARA_MUL_N_MINSIZE 2 |
---|
| 737 | #define MPN_KARA_SQR_N_MINSIZE 2 |
---|
| 738 | |
---|
| 739 | /* Need l>=1, ls>=1, and 2*ls > l (the latter for the tD MPN_INCR_U) */ |
---|
| 740 | #define MPN_TOOM3_MUL_N_MINSIZE 11 |
---|
| 741 | #define MPN_TOOM3_SQR_N_MINSIZE 11 |
---|
| 742 | |
---|
| 743 | #define mpn_sqr_diagonal __MPN(sqr_diagonal) |
---|
| 744 | void mpn_sqr_diagonal _PROTO ((mp_ptr, mp_srcptr, mp_size_t)); |
---|
| 745 | |
---|
[15293] | 746 | #define mpn_kara_mul_n __MPN(kara_mul_n) |
---|
| 747 | void mpn_kara_mul_n _PROTO((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr)); |
---|
| 748 | |
---|
| 749 | #define mpn_kara_sqr_n __MPN(kara_sqr_n) |
---|
| 750 | void mpn_kara_sqr_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); |
---|
| 751 | |
---|
| 752 | #define mpn_toom3_mul_n __MPN(toom3_mul_n) |
---|
| 753 | void mpn_toom3_mul_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t,mp_ptr)); |
---|
| 754 | |
---|
| 755 | #define mpn_toom3_sqr_n __MPN(toom3_sqr_n) |
---|
| 756 | void mpn_toom3_sqr_n _PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); |
---|
| 757 | |
---|
| 758 | |
---|
[18190] | 759 | #define mpn_fft_best_k __MPN(fft_best_k) |
---|
| 760 | int mpn_fft_best_k _PROTO ((mp_size_t n, int sqr)) ATTRIBUTE_CONST; |
---|
| 761 | |
---|
[15293] | 762 | #define mpn_mul_fft __MPN(mul_fft) |
---|
| 763 | void mpn_mul_fft _PROTO ((mp_ptr op, mp_size_t pl, |
---|
| 764 | mp_srcptr n, mp_size_t nl, |
---|
| 765 | mp_srcptr m, mp_size_t ml, |
---|
| 766 | int k)); |
---|
| 767 | |
---|
| 768 | #define mpn_mul_fft_full __MPN(mul_fft_full) |
---|
| 769 | void mpn_mul_fft_full _PROTO ((mp_ptr op, |
---|
| 770 | mp_srcptr n, mp_size_t nl, |
---|
| 771 | mp_srcptr m, mp_size_t ml)); |
---|
| 772 | |
---|
[18190] | 773 | #define mpn_fft_next_size __MPN(fft_next_size) |
---|
| 774 | mp_size_t mpn_fft_next_size _PROTO ((mp_size_t pl, int k)) ATTRIBUTE_CONST; |
---|
[15293] | 775 | |
---|
[18190] | 776 | #define mpn_sb_divrem_mn __MPN(sb_divrem_mn) |
---|
| 777 | mp_limb_t mpn_sb_divrem_mn _PROTO ((mp_ptr, mp_ptr, mp_size_t, |
---|
| 778 | mp_srcptr, mp_size_t)); |
---|
| 779 | |
---|
| 780 | #define mpn_dc_divrem_n __MPN(dc_divrem_n) |
---|
| 781 | mp_limb_t mpn_dc_divrem_n _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t)); |
---|
| 782 | |
---|
| 783 | /* #define mpn_tdiv_q __MPN(tdiv_q) */ |
---|
[15293] | 784 | /* void mpn_tdiv_q _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); */ |
---|
| 785 | |
---|
[18190] | 786 | #define mpz_divexact_gcd __gmpz_divexact_gcd |
---|
| 787 | void mpz_divexact_gcd _PROTO ((mpz_ptr q, mpz_srcptr a, mpz_srcptr d)); |
---|
| 788 | |
---|
| 789 | #define mpz_inp_str_nowhite __gmpz_inp_str_nowhite |
---|
| 790 | #ifdef _GMP_H_HAVE_FILE |
---|
| 791 | size_t mpz_inp_str_nowhite _PROTO ((mpz_ptr x, FILE *stream, int base, int c, size_t nread)); |
---|
| 792 | #endif |
---|
| 793 | |
---|
| 794 | #define mpn_divisible_p __MPN(divisible_p) |
---|
| 795 | int mpn_divisible_p _PROTO ((mp_srcptr ap, mp_size_t asize, |
---|
| 796 | mp_srcptr dp, mp_size_t dsize)) __GMP_ATTRIBUTE_PURE; |
---|
| 797 | |
---|
| 798 | #define mpn_rootrem __gmpn_rootrem |
---|
| 799 | mp_size_t mpn_rootrem _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)); |
---|
| 800 | |
---|
| 801 | |
---|
| 802 | /* from gmp.h */ |
---|
| 803 | #if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc |
---|
| 804 | #define MPN_COPY_INCR(dst, src, size) \ |
---|
| 805 | do { \ |
---|
| 806 | ASSERT ((size) >= 0); \ |
---|
| 807 | ASSERT (MPN_SAME_OR_INCR_P (dst, src, size)); \ |
---|
| 808 | __GMPN_COPY_INCR (dst, src, size); \ |
---|
| 809 | } while (0) |
---|
| 810 | #endif |
---|
| 811 | |
---|
| 812 | #if defined (_CRAY) |
---|
| 813 | #define MPN_COPY_INCR(dst, src, n) \ |
---|
[15293] | 814 | do { \ |
---|
[18190] | 815 | int __i; /* Faster on some Crays with plain int */ \ |
---|
| 816 | _Pragma ("_CRI ivdep"); \ |
---|
| 817 | for (__i = 0; __i < (n); __i++) \ |
---|
| 818 | (dst)[__i] = (src)[__i]; \ |
---|
[15293] | 819 | } while (0) |
---|
| 820 | #endif |
---|
[18190] | 821 | |
---|
| 822 | #define mpn_copyi __MPN(copyi) |
---|
| 823 | void mpn_copyi _PROTO ((mp_ptr, mp_srcptr, mp_size_t)); |
---|
| 824 | |
---|
| 825 | #if ! defined (MPN_COPY_INCR) && HAVE_NATIVE_mpn_copyi |
---|
| 826 | #define MPN_COPY_INCR(dst, src, size) \ |
---|
| 827 | do { \ |
---|
| 828 | ASSERT ((size) >= 0); \ |
---|
| 829 | ASSERT (MPN_SAME_OR_INCR_P (dst, src, size)); \ |
---|
| 830 | mpn_copyi (dst, src, size); \ |
---|
| 831 | } while (0) |
---|
[15293] | 832 | #endif |
---|
| 833 | |
---|
[18190] | 834 | /* Copy N limbs from SRC to DST incrementing, N==0 allowed. */ |
---|
| 835 | #if ! defined (MPN_COPY_INCR) |
---|
| 836 | #define MPN_COPY_INCR(dst, src, n) \ |
---|
| 837 | do { \ |
---|
| 838 | ASSERT ((n) >= 0); \ |
---|
| 839 | ASSERT (MPN_SAME_OR_INCR_P (dst, src, n)); \ |
---|
| 840 | if ((n) != 0) \ |
---|
| 841 | { \ |
---|
| 842 | mp_size_t __n = (n) - 1; \ |
---|
| 843 | mp_ptr __dst = (dst); \ |
---|
| 844 | mp_srcptr __src = (src); \ |
---|
| 845 | mp_limb_t __x; \ |
---|
| 846 | __x = *__src++; \ |
---|
| 847 | if (__n != 0) \ |
---|
| 848 | { \ |
---|
| 849 | do \ |
---|
| 850 | { \ |
---|
| 851 | *__dst++ = __x; \ |
---|
| 852 | __x = *__src++; \ |
---|
| 853 | } \ |
---|
| 854 | while (--__n); \ |
---|
| 855 | } \ |
---|
| 856 | *__dst++ = __x; \ |
---|
| 857 | } \ |
---|
| 858 | } while (0) |
---|
[15293] | 859 | #endif |
---|
| 860 | |
---|
[18190] | 861 | |
---|
| 862 | /* As per __GMPN_COPY_INCR in gmp.h. */ |
---|
| 863 | #if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc |
---|
| 864 | #define MPN_COPY_DECR(dst, src, size) \ |
---|
| 865 | do { \ |
---|
| 866 | ASSERT ((size) >= 0); \ |
---|
| 867 | ASSERT (MPN_SAME_OR_DECR_P (dst, src, size)); \ |
---|
| 868 | if ((size) != 0) \ |
---|
| 869 | { \ |
---|
| 870 | mp_ptr __dst = (dst) + (size); \ |
---|
| 871 | mp_srcptr __src = (src) + (size); \ |
---|
| 872 | mp_size_t __size = (size); \ |
---|
| 873 | do \ |
---|
| 874 | *--__dst = *--__src; \ |
---|
| 875 | while (--__size != 0); \ |
---|
| 876 | } \ |
---|
| 877 | } while (0) |
---|
| 878 | #endif |
---|
| 879 | |
---|
| 880 | #if defined (_CRAY) |
---|
| 881 | #define MPN_COPY_DECR(dst, src, n) \ |
---|
[15293] | 882 | do { \ |
---|
[18190] | 883 | int __i; /* Faster on some Crays with plain int */ \ |
---|
| 884 | _Pragma ("_CRI ivdep"); \ |
---|
| 885 | for (__i = (n) - 1; __i >= 0; __i--) \ |
---|
| 886 | (dst)[__i] = (src)[__i]; \ |
---|
[15293] | 887 | } while (0) |
---|
| 888 | #endif |
---|
[18190] | 889 | |
---|
| 890 | #define mpn_copyd __MPN(copyd) |
---|
| 891 | void mpn_copyd _PROTO ((mp_ptr, mp_srcptr, mp_size_t)); |
---|
| 892 | |
---|
| 893 | #if ! defined (MPN_COPY_DECR) && HAVE_NATIVE_mpn_copyd |
---|
| 894 | #define MPN_COPY_DECR(dst, src, size) \ |
---|
| 895 | do { \ |
---|
| 896 | ASSERT ((size) >= 0); \ |
---|
| 897 | ASSERT (MPN_SAME_OR_DECR_P (dst, src, size)); \ |
---|
| 898 | mpn_copyd (dst, src, size); \ |
---|
| 899 | } while (0) |
---|
[15293] | 900 | #endif |
---|
| 901 | |
---|
[18190] | 902 | /* Copy N limbs from SRC to DST decrementing, N==0 allowed. */ |
---|
| 903 | #if ! defined (MPN_COPY_DECR) |
---|
| 904 | #define MPN_COPY_DECR(dst, src, n) \ |
---|
| 905 | do { \ |
---|
| 906 | ASSERT ((n) >= 0); \ |
---|
| 907 | ASSERT (MPN_SAME_OR_DECR_P (dst, src, n)); \ |
---|
| 908 | if ((n) != 0) \ |
---|
| 909 | { \ |
---|
| 910 | mp_size_t __n = (n) - 1; \ |
---|
| 911 | mp_ptr __dst = (dst) + __n; \ |
---|
| 912 | mp_srcptr __src = (src) + __n; \ |
---|
| 913 | mp_limb_t __x; \ |
---|
| 914 | __x = *__src--; \ |
---|
| 915 | if (__n != 0) \ |
---|
| 916 | { \ |
---|
| 917 | do \ |
---|
| 918 | { \ |
---|
| 919 | *__dst-- = __x; \ |
---|
| 920 | __x = *__src--; \ |
---|
| 921 | } \ |
---|
| 922 | while (--__n); \ |
---|
| 923 | } \ |
---|
| 924 | *__dst-- = __x; \ |
---|
| 925 | } \ |
---|
| 926 | } while (0) |
---|
[15293] | 927 | #endif |
---|
| 928 | |
---|
[18190] | 929 | |
---|
[15293] | 930 | #ifndef MPN_COPY |
---|
[18190] | 931 | #define MPN_COPY(d,s,n) \ |
---|
| 932 | do { \ |
---|
| 933 | ASSERT (MPN_SAME_OR_SEPARATE_P (d, s, n)); \ |
---|
| 934 | MPN_COPY_INCR (d, s, n); \ |
---|
| 935 | } while (0) |
---|
[15293] | 936 | #endif |
---|
| 937 | |
---|
[18190] | 938 | |
---|
| 939 | /* Set {dst,size} to the limbs of {src,size} in reverse order. */ |
---|
| 940 | #define MPN_REVERSE(dst, src, size) \ |
---|
| 941 | do { \ |
---|
| 942 | mp_ptr __dst = (dst); \ |
---|
| 943 | mp_size_t __size = (size); \ |
---|
| 944 | mp_srcptr __src = (src) + __size - 1; \ |
---|
| 945 | mp_size_t __i; \ |
---|
| 946 | ASSERT ((size) >= 0); \ |
---|
| 947 | ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \ |
---|
| 948 | CRAY_Pragma ("_CRI ivdep"); \ |
---|
| 949 | for (__i = 0; __i < __size; __i++) \ |
---|
| 950 | { \ |
---|
| 951 | *__dst = *__src; \ |
---|
| 952 | __dst++; \ |
---|
| 953 | __src--; \ |
---|
| 954 | } \ |
---|
| 955 | } while (0) |
---|
| 956 | |
---|
| 957 | |
---|
| 958 | /* Zero n limbs at dst. |
---|
| 959 | |
---|
| 960 | For power and powerpc we want an inline stu/bdnz loop for zeroing. On |
---|
| 961 | ppc630 for instance this is optimal since it can sustain only 1 store per |
---|
| 962 | cycle. |
---|
| 963 | |
---|
| 964 | gcc 2.95.x (for powerpc64 -maix64, or powerpc32) doesn't recognise the |
---|
| 965 | "for" loop in the generic code below can become stu/bdnz. The do/while |
---|
| 966 | here helps it get to that. The same caveat about plain -mpowerpc64 mode |
---|
| 967 | applies here as to __GMPN_COPY_INCR in gmp.h. |
---|
| 968 | |
---|
| 969 | xlc 3.1 already generates stu/bdnz from the generic C, and does so from |
---|
| 970 | this loop too. |
---|
| 971 | |
---|
| 972 | Enhancement: GLIBC does some trickery with dcbz to zero whole cache lines |
---|
| 973 | at a time. MPN_ZERO isn't all that important in GMP, so it might be more |
---|
| 974 | trouble than it's worth to do the same, though perhaps a call to memset |
---|
| 975 | would be good when on a GNU system. */ |
---|
| 976 | |
---|
| 977 | #if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc |
---|
| 978 | #define MPN_ZERO(dst, n) \ |
---|
| 979 | do { \ |
---|
| 980 | ASSERT ((n) >= 0); \ |
---|
| 981 | if ((n) != 0) \ |
---|
| 982 | { \ |
---|
| 983 | mp_ptr __dst = (dst) - 1; \ |
---|
| 984 | mp_size_t __n = (n); \ |
---|
| 985 | do \ |
---|
| 986 | *++__dst = 0; \ |
---|
| 987 | while (--__n); \ |
---|
| 988 | } \ |
---|
| 989 | } while (0) |
---|
| 990 | #endif |
---|
| 991 | |
---|
[15293] | 992 | #ifndef MPN_ZERO |
---|
[18190] | 993 | #define MPN_ZERO(dst, n) \ |
---|
| 994 | do { \ |
---|
| 995 | ASSERT ((n) >= 0); \ |
---|
| 996 | if ((n) != 0) \ |
---|
| 997 | { \ |
---|
| 998 | mp_ptr __dst = (dst); \ |
---|
| 999 | mp_size_t __n = (n); \ |
---|
| 1000 | do \ |
---|
| 1001 | *__dst++ = 0; \ |
---|
| 1002 | while (--__n); \ |
---|
| 1003 | } \ |
---|
[15293] | 1004 | } while (0) |
---|
| 1005 | #endif |
---|
| 1006 | |
---|
[18190] | 1007 | |
---|
| 1008 | /* On the x86s repe/scasl doesn't seem useful, since it takes many cycles to |
---|
| 1009 | start up and would need to strip a lot of zeros before it'd be faster |
---|
| 1010 | than a simple cmpl loop. Here are some times in cycles for |
---|
| 1011 | std/repe/scasl/cld and cld/repe/scasl (the latter would be for stripping |
---|
| 1012 | low zeros). |
---|
| 1013 | |
---|
| 1014 | std cld |
---|
| 1015 | P5 18 16 |
---|
| 1016 | P6 46 38 |
---|
| 1017 | K6 36 13 |
---|
| 1018 | K7 21 20 |
---|
| 1019 | */ |
---|
[15293] | 1020 | #ifndef MPN_NORMALIZE |
---|
| 1021 | #define MPN_NORMALIZE(DST, NLIMBS) \ |
---|
| 1022 | do { \ |
---|
| 1023 | while (NLIMBS > 0) \ |
---|
| 1024 | { \ |
---|
| 1025 | if ((DST)[(NLIMBS) - 1] != 0) \ |
---|
| 1026 | break; \ |
---|
| 1027 | NLIMBS--; \ |
---|
| 1028 | } \ |
---|
| 1029 | } while (0) |
---|
| 1030 | #endif |
---|
| 1031 | #ifndef MPN_NORMALIZE_NOT_ZERO |
---|
[18190] | 1032 | #define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \ |
---|
| 1033 | do { \ |
---|
| 1034 | ASSERT ((NLIMBS) >= 1); \ |
---|
| 1035 | while (1) \ |
---|
| 1036 | { \ |
---|
| 1037 | if ((DST)[(NLIMBS) - 1] != 0) \ |
---|
| 1038 | break; \ |
---|
| 1039 | NLIMBS--; \ |
---|
| 1040 | } \ |
---|
[15293] | 1041 | } while (0) |
---|
| 1042 | #endif |
---|
| 1043 | |
---|
[18190] | 1044 | /* Strip least significant zero limbs from {ptr,size} by incrementing ptr |
---|
| 1045 | and decrementing size. low should be ptr[0], and will be the new ptr[0] |
---|
| 1046 | on returning. The number in {ptr,size} must be non-zero, ie. size!=0 and |
---|
| 1047 | somewhere a non-zero limb. */ |
---|
| 1048 | #define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low) \ |
---|
| 1049 | do { \ |
---|
| 1050 | ASSERT ((size) >= 1); \ |
---|
| 1051 | ASSERT ((low) == (ptr)[0]); \ |
---|
| 1052 | \ |
---|
| 1053 | while ((low) == 0) \ |
---|
| 1054 | { \ |
---|
| 1055 | (size)--; \ |
---|
| 1056 | ASSERT ((size) >= 1); \ |
---|
| 1057 | (ptr)++; \ |
---|
| 1058 | (low) = *(ptr); \ |
---|
| 1059 | } \ |
---|
| 1060 | } while (0) |
---|
[15293] | 1061 | |
---|
| 1062 | /* Initialize X of type mpz_t with space for NLIMBS limbs. X should be a |
---|
| 1063 | temporary variable; it will be automatically cleared out at function |
---|
| 1064 | return. We use __x here to make it possible to accept both mpz_ptr and |
---|
| 1065 | mpz_t arguments. */ |
---|
[18190] | 1066 | #define MPZ_TMP_INIT(X, NLIMBS) \ |
---|
| 1067 | do { \ |
---|
| 1068 | mpz_ptr __x = (X); \ |
---|
| 1069 | ASSERT ((NLIMBS) >= 1); \ |
---|
| 1070 | __x->_mp_alloc = (NLIMBS); \ |
---|
| 1071 | __x->_mp_d = (mp_ptr) TMP_ALLOC ((NLIMBS) * BYTES_PER_MP_LIMB); \ |
---|
[15293] | 1072 | } while (0) |
---|
| 1073 | |
---|
[18190] | 1074 | /* Realloc for an mpz_t WHAT if it has less than NEEDED limbs. */ |
---|
| 1075 | #define MPZ_REALLOC(z,n) ((n) > ALLOC(z) ? _mpz_realloc(z,n) : PTR(z)) |
---|
[15293] | 1076 | |
---|
[18190] | 1077 | #define MPZ_EQUAL_1_P(z) (SIZ(z)==1 && PTR(z)[0] == 1) |
---|
| 1078 | |
---|
| 1079 | |
---|
| 1080 | /* MPN_FIB2_SIZE(n) is the size in limbs required by mpn_fib2_ui for fp and |
---|
| 1081 | f1p. |
---|
| 1082 | |
---|
| 1083 | From Knuth vol 1 section 1.2.8, F[n] = phi^n/sqrt(5) rounded to the |
---|
| 1084 | nearest integer, where phi=(1+sqrt(5))/2 is the golden ratio. So the |
---|
| 1085 | number of bits required is n*log_2((1+sqrt(5))/2) = n*0.6942419. |
---|
| 1086 | |
---|
| 1087 | The multiplier used is 23/32=0.71875 for efficient calculation on CPUs |
---|
| 1088 | without good floating point. There's +2 for rounding up, and a further |
---|
| 1089 | +2 since at the last step x limbs are doubled into a 2x+1 limb region |
---|
| 1090 | whereas the actual F[2k] value might be only 2x-1 limbs. |
---|
| 1091 | |
---|
| 1092 | Note that a division is done first, since on a 32-bit system it's at |
---|
| 1093 | least conceivable to go right up to n==ULONG_MAX. (F[2^32-1] would be |
---|
| 1094 | about 380Mbytes, plus temporary workspace of about 1.2Gbytes here and |
---|
| 1095 | whatever a multiply of two 190Mbyte numbers takes.) |
---|
| 1096 | |
---|
| 1097 | Enhancement: When GMP_NUMB_BITS is not a power of 2 the division could be |
---|
| 1098 | worked into the multiplier. */ |
---|
| 1099 | |
---|
| 1100 | #define MPN_FIB2_SIZE(n) \ |
---|
| 1101 | ((mp_size_t) ((n) / 32 * 23 / GMP_NUMB_BITS) + 4) |
---|
| 1102 | |
---|
| 1103 | |
---|
| 1104 | /* FIB_TABLE(n) returns the Fibonacci number F[n]. Must have n in the range |
---|
| 1105 | -1 <= n <= FIB_TABLE_LIMIT. |
---|
| 1106 | |
---|
| 1107 | FIB_TABLE_LUCNUM_LIMIT is the largest n for which L[n] = F[n] + 2*F[n-1] |
---|
| 1108 | fits in a limb. |
---|
| 1109 | |
---|
| 1110 | This data generated by code at the end of mpn/generic/fib2_ui.c. */ |
---|
| 1111 | |
---|
| 1112 | extern const mp_limb_t __gmp_fib_table[]; |
---|
| 1113 | #define FIB_TABLE(n) (__gmp_fib_table[(n)+1]) |
---|
| 1114 | |
---|
| 1115 | #if GMP_NUMB_BITS >= 64 |
---|
| 1116 | #define FIB_TABLE_LIMIT 93 |
---|
| 1117 | #define FIB_TABLE_LUCNUM_LIMIT 92 |
---|
| 1118 | #else |
---|
| 1119 | #if GMP_NUMB_BITS >= 32 |
---|
| 1120 | #define FIB_TABLE_LIMIT 47 |
---|
| 1121 | #define FIB_TABLE_LUCNUM_LIMIT 46 |
---|
| 1122 | #else |
---|
| 1123 | #if GMP_NUMB_BITS >= 16 |
---|
| 1124 | #define FIB_TABLE_LIMIT 24 |
---|
| 1125 | #define FIB_TABLE_LUCNUM_LIMIT 23 |
---|
| 1126 | #else |
---|
| 1127 | #if GMP_NUMB_BITS >= 8 |
---|
| 1128 | #define FIB_TABLE_LIMIT 13 |
---|
| 1129 | #define FIB_TABLE_LUCNUM_LIMIT 11 |
---|
| 1130 | #else |
---|
| 1131 | #if GMP_NUMB_BITS >= 4 |
---|
| 1132 | #define FIB_TABLE_LIMIT 7 |
---|
| 1133 | #define FIB_TABLE_LUCNUM_LIMIT 5 |
---|
| 1134 | #endif /* 4 */ |
---|
| 1135 | #endif /* 8 */ |
---|
| 1136 | #endif /* 16 */ |
---|
| 1137 | #endif /* 32 */ |
---|
| 1138 | #endif /* 64 */ |
---|
| 1139 | |
---|
| 1140 | |
---|
| 1141 | /* For a threshold between algorithms A and B, size>=thresh is where B |
---|
| 1142 | should be used. Special value MP_SIZE_T_MAX means only ever use A, or |
---|
| 1143 | value 0 means only ever use B. The tests for these special values will |
---|
| 1144 | be compile-time constants, so the compiler should be able to eliminate |
---|
| 1145 | the code for the unwanted algorithm. */ |
---|
| 1146 | |
---|
| 1147 | #define ABOVE_THRESHOLD(size,thresh) \ |
---|
| 1148 | ((thresh) == 0 \ |
---|
| 1149 | || ((thresh) != MP_SIZE_T_MAX \ |
---|
| 1150 | && (size) >= (thresh))) |
---|
| 1151 | #define BELOW_THRESHOLD(size,thresh) (! ABOVE_THRESHOLD (size, thresh)) |
---|
| 1152 | |
---|
| 1153 | |
---|
| 1154 | /* If MUL_KARATSUBA_THRESHOLD is not already defined, define it to a |
---|
[15293] | 1155 | value which is good on most machines. */ |
---|
[18190] | 1156 | #ifndef MUL_KARATSUBA_THRESHOLD |
---|
| 1157 | #define MUL_KARATSUBA_THRESHOLD 32 |
---|
[15293] | 1158 | #endif |
---|
| 1159 | |
---|
[18190] | 1160 | /* If MUL_TOOM3_THRESHOLD is not already defined, define it to a |
---|
[15293] | 1161 | value which is good on most machines. */ |
---|
[18190] | 1162 | #ifndef MUL_TOOM3_THRESHOLD |
---|
| 1163 | #define MUL_TOOM3_THRESHOLD 256 |
---|
[15293] | 1164 | #endif |
---|
| 1165 | |
---|
[18190] | 1166 | /* This is the threshold at which mpn_sqr_basecase should take over from |
---|
| 1167 | mpn_mul_basecase in mpn_sqr_n. Default is to use mpn_sqr_basecase |
---|
| 1168 | always. |
---|
| 1169 | |
---|
| 1170 | If it turns out that mpn_kara_sqr_n becomes faster than mpn_mul_basecase |
---|
| 1171 | before mpn_sqr_basecase does, then SQR_BASECASE_THRESHOLD is the |
---|
| 1172 | karatsuba threshold and SQR_KARATSUBA_THRESHOLD is 0. This oddity arises |
---|
| 1173 | more or less because SQR_KARATSUBA_THRESHOLD represents the size up to |
---|
| 1174 | which mpn_sqr_basecase should be used, and that may be never. */ |
---|
| 1175 | |
---|
| 1176 | #ifndef SQR_BASECASE_THRESHOLD |
---|
| 1177 | #define SQR_BASECASE_THRESHOLD 0 |
---|
[15293] | 1178 | #endif |
---|
| 1179 | |
---|
[18190] | 1180 | #ifndef SQR_KARATSUBA_THRESHOLD |
---|
| 1181 | #define SQR_KARATSUBA_THRESHOLD (2*MUL_KARATSUBA_THRESHOLD) |
---|
[15293] | 1182 | #endif |
---|
| 1183 | |
---|
[18190] | 1184 | #ifndef SQR_TOOM3_THRESHOLD |
---|
| 1185 | #define SQR_TOOM3_THRESHOLD (2*MUL_TOOM3_THRESHOLD) |
---|
| 1186 | #endif |
---|
| 1187 | |
---|
[15293] | 1188 | /* First k to use for an FFT modF multiply. A modF FFT is an order |
---|
| 1189 | log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba, |
---|
| 1190 | whereas k=4 is 1.33 which is faster than toom3 at 1.485. */ |
---|
| 1191 | #define FFT_FIRST_K 4 |
---|
| 1192 | |
---|
| 1193 | /* Threshold at which FFT should be used to do a modF NxN -> N multiply. */ |
---|
[18190] | 1194 | #ifndef MUL_FFT_MODF_THRESHOLD |
---|
| 1195 | #define MUL_FFT_MODF_THRESHOLD (MUL_TOOM3_THRESHOLD * 3) |
---|
[15293] | 1196 | #endif |
---|
[18190] | 1197 | #ifndef SQR_FFT_MODF_THRESHOLD |
---|
| 1198 | #define SQR_FFT_MODF_THRESHOLD (SQR_TOOM3_THRESHOLD * 3) |
---|
[15293] | 1199 | #endif |
---|
| 1200 | |
---|
| 1201 | /* Threshold at which FFT should be used to do an NxN -> 2N multiply. This |
---|
| 1202 | will be a size where FFT is using k=7 or k=8, since an FFT-k used for an |
---|
| 1203 | NxN->2N multiply and not recursing into itself is an order |
---|
| 1204 | log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which |
---|
| 1205 | is the first better than toom3. */ |
---|
[18190] | 1206 | #ifndef MUL_FFT_THRESHOLD |
---|
| 1207 | #define MUL_FFT_THRESHOLD (MUL_FFT_MODF_THRESHOLD * 10) |
---|
[15293] | 1208 | #endif |
---|
[18190] | 1209 | #ifndef SQR_FFT_THRESHOLD |
---|
| 1210 | #define SQR_FFT_THRESHOLD (SQR_FFT_MODF_THRESHOLD * 10) |
---|
[15293] | 1211 | #endif |
---|
| 1212 | |
---|
| 1213 | /* Table of thresholds for successive modF FFT "k"s. The first entry is |
---|
| 1214 | where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2, |
---|
| 1215 | etc. See mpn_fft_best_k(). */ |
---|
[18190] | 1216 | #ifndef MUL_FFT_TABLE |
---|
| 1217 | #define MUL_FFT_TABLE \ |
---|
| 1218 | { MUL_TOOM3_THRESHOLD * 4, /* k=5 */ \ |
---|
| 1219 | MUL_TOOM3_THRESHOLD * 8, /* k=6 */ \ |
---|
| 1220 | MUL_TOOM3_THRESHOLD * 16, /* k=7 */ \ |
---|
| 1221 | MUL_TOOM3_THRESHOLD * 32, /* k=8 */ \ |
---|
| 1222 | MUL_TOOM3_THRESHOLD * 96, /* k=9 */ \ |
---|
| 1223 | MUL_TOOM3_THRESHOLD * 288, /* k=10 */ \ |
---|
[15293] | 1224 | 0 } |
---|
| 1225 | #endif |
---|
[18190] | 1226 | #ifndef SQR_FFT_TABLE |
---|
| 1227 | #define SQR_FFT_TABLE \ |
---|
| 1228 | { SQR_TOOM3_THRESHOLD * 4, /* k=5 */ \ |
---|
| 1229 | SQR_TOOM3_THRESHOLD * 8, /* k=6 */ \ |
---|
| 1230 | SQR_TOOM3_THRESHOLD * 16, /* k=7 */ \ |
---|
| 1231 | SQR_TOOM3_THRESHOLD * 32, /* k=8 */ \ |
---|
| 1232 | SQR_TOOM3_THRESHOLD * 96, /* k=9 */ \ |
---|
| 1233 | SQR_TOOM3_THRESHOLD * 288, /* k=10 */ \ |
---|
[15293] | 1234 | 0 } |
---|
| 1235 | #endif |
---|
| 1236 | |
---|
| 1237 | #ifndef FFT_TABLE_ATTRS |
---|
| 1238 | #define FFT_TABLE_ATTRS static const |
---|
| 1239 | #endif |
---|
| 1240 | |
---|
| 1241 | #define MPN_FFT_TABLE_SIZE 16 |
---|
| 1242 | |
---|
| 1243 | |
---|
[18190] | 1244 | /* mpn_dc_divrem_n(n) calls 2*mul(n/2)+2*div(n/2), thus to be faster than |
---|
| 1245 | div(n) = 4*div(n/2), we need mul(n/2) to be faster than the classic way, |
---|
| 1246 | i.e. n/2 >= MUL_KARATSUBA_THRESHOLD |
---|
| 1247 | |
---|
| 1248 | Measured values are between 2 and 4 times MUL_KARATSUBA_THRESHOLD, so go |
---|
| 1249 | for 3 as an average. */ |
---|
| 1250 | |
---|
| 1251 | #ifndef DIV_DC_THRESHOLD |
---|
| 1252 | #define DIV_DC_THRESHOLD (3 * MUL_KARATSUBA_THRESHOLD) |
---|
| 1253 | #endif |
---|
| 1254 | |
---|
| 1255 | |
---|
[15293] | 1256 | /* Return non-zero if xp,xsize and yp,ysize overlap. |
---|
| 1257 | If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no |
---|
| 1258 | overlap. If both these are false, there's an overlap. */ |
---|
| 1259 | #define MPN_OVERLAP_P(xp, xsize, yp, ysize) \ |
---|
| 1260 | ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp)) |
---|
[18190] | 1261 | #define MEM_OVERLAP_P(xp, xsize, yp, ysize) \ |
---|
| 1262 | ( (char *) (xp) + (xsize) > (char *) (yp) \ |
---|
| 1263 | && (char *) (yp) + (ysize) > (char *) (xp)) |
---|
[15293] | 1264 | |
---|
[18190] | 1265 | /* Return non-zero if xp,xsize and yp,ysize are either identical or not |
---|
| 1266 | overlapping. Return zero if they're partially overlapping. */ |
---|
| 1267 | #define MPN_SAME_OR_SEPARATE_P(xp, yp, size) \ |
---|
| 1268 | MPN_SAME_OR_SEPARATE2_P(xp, size, yp, size) |
---|
| 1269 | #define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize) \ |
---|
| 1270 | ((xp) == (yp) || ! MPN_OVERLAP_P (xp, xsize, yp, ysize)) |
---|
[15293] | 1271 | |
---|
[18190] | 1272 | /* Return non-zero if dst,dsize and src,ssize are either identical or |
---|
| 1273 | overlapping in a way suitable for an incrementing/decrementing algorithm. |
---|
| 1274 | Return zero if they're partially overlapping in an unsuitable fashion. */ |
---|
| 1275 | #define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize) \ |
---|
| 1276 | ((dst) <= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize)) |
---|
| 1277 | #define MPN_SAME_OR_INCR_P(dst, src, size) \ |
---|
| 1278 | MPN_SAME_OR_INCR2_P(dst, size, src, size) |
---|
| 1279 | #define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize) \ |
---|
| 1280 | ((dst) >= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize)) |
---|
| 1281 | #define MPN_SAME_OR_DECR_P(dst, src, size) \ |
---|
| 1282 | MPN_SAME_OR_DECR2_P(dst, size, src, size) |
---|
| 1283 | |
---|
| 1284 | |
---|
[15293] | 1285 | /* ASSERT() is a private assertion checking scheme, similar to <assert.h>. |
---|
| 1286 | ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS() |
---|
| 1287 | does it always. Generally assertions are meant for development, but |
---|
| 1288 | might help when looking for a problem later too. |
---|
| 1289 | |
---|
[18190] | 1290 | Note that strings shouldn't be used within the ASSERT expression, |
---|
| 1291 | eg. ASSERT(strcmp(s,"notgood")!=0), since the quotes upset the "expr" |
---|
| 1292 | used in the !HAVE_STRINGIZE case (ie. K&R). */ |
---|
[15293] | 1293 | |
---|
| 1294 | #ifdef __LINE__ |
---|
| 1295 | #define ASSERT_LINE __LINE__ |
---|
| 1296 | #else |
---|
| 1297 | #define ASSERT_LINE -1 |
---|
| 1298 | #endif |
---|
| 1299 | |
---|
| 1300 | #ifdef __FILE__ |
---|
| 1301 | #define ASSERT_FILE __FILE__ |
---|
| 1302 | #else |
---|
| 1303 | #define ASSERT_FILE "" |
---|
| 1304 | #endif |
---|
| 1305 | |
---|
[18190] | 1306 | void __gmp_assert_header _PROTO ((const char *filename, int linenum)); |
---|
| 1307 | __GMP_DECLSPEC void __gmp_assert_fail _PROTO ((const char *filename, int linenum, const char *expr)) ATTRIBUTE_NORETURN; |
---|
[15293] | 1308 | |
---|
| 1309 | #if HAVE_STRINGIZE |
---|
| 1310 | #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr) |
---|
| 1311 | #else |
---|
| 1312 | #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr") |
---|
| 1313 | #endif |
---|
| 1314 | |
---|
[18190] | 1315 | #define ASSERT_ALWAYS(expr) \ |
---|
| 1316 | do { \ |
---|
| 1317 | if (!(expr)) \ |
---|
| 1318 | ASSERT_FAIL (expr); \ |
---|
| 1319 | } while (0) |
---|
| 1320 | |
---|
| 1321 | #if WANT_ASSERT |
---|
| 1322 | #define ASSERT(expr) ASSERT_ALWAYS (expr) |
---|
[15293] | 1323 | #else |
---|
[18190] | 1324 | #define ASSERT(expr) do {} while (0) |
---|
[15293] | 1325 | #endif |
---|
| 1326 | |
---|
| 1327 | |
---|
[18190] | 1328 | /* ASSERT_CARRY checks the expression is non-zero, and ASSERT_NOCARRY checks |
---|
| 1329 | that it's zero. In both cases if assertion checking is disabled the |
---|
| 1330 | expression is still evaluated. These macros are meant for use with |
---|
| 1331 | routines like mpn_add_n() where the return value represents a carry or |
---|
| 1332 | whatever that should or shouldn't occur in some context. For example, |
---|
| 1333 | ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */ |
---|
[15293] | 1334 | #if WANT_ASSERT |
---|
[18190] | 1335 | #define ASSERT_CARRY(expr) ASSERT_ALWAYS ((expr) != 0) |
---|
[15293] | 1336 | #define ASSERT_NOCARRY(expr) ASSERT_ALWAYS ((expr) == 0) |
---|
| 1337 | #else |
---|
[18190] | 1338 | #define ASSERT_CARRY(expr) (expr) |
---|
[15293] | 1339 | #define ASSERT_NOCARRY(expr) (expr) |
---|
| 1340 | #endif |
---|
| 1341 | |
---|
| 1342 | |
---|
[18190] | 1343 | /* ASSERT_CODE includes code when assertion checking is wanted. This is the |
---|
| 1344 | same as writing "#if WANT_ASSERT", but more compact. */ |
---|
| 1345 | #if WANT_ASSERT |
---|
| 1346 | #define ASSERT_CODE(expr) expr |
---|
| 1347 | #else |
---|
| 1348 | #define ASSERT_CODE(expr) |
---|
| 1349 | #endif |
---|
| 1350 | |
---|
| 1351 | |
---|
| 1352 | /* Test that an mpq_t is in fully canonical form. This can be used as |
---|
| 1353 | protection on routines like mpq_equal which give wrong results on |
---|
| 1354 | non-canonical inputs. */ |
---|
| 1355 | #if WANT_ASSERT |
---|
| 1356 | #define ASSERT_MPQ_CANONICAL(q) \ |
---|
| 1357 | do { \ |
---|
| 1358 | ASSERT (q->_mp_den._mp_size > 0); \ |
---|
| 1359 | if (q->_mp_num._mp_size == 0) \ |
---|
| 1360 | { \ |
---|
| 1361 | /* zero should be 0/1 */ \ |
---|
| 1362 | ASSERT (mpz_cmp_ui (mpq_denref(q), 1L) == 0); \ |
---|
| 1363 | } \ |
---|
| 1364 | else \ |
---|
| 1365 | { \ |
---|
| 1366 | /* no common factors */ \ |
---|
| 1367 | mpz_t __g; \ |
---|
| 1368 | mpz_init (__g); \ |
---|
| 1369 | mpz_gcd (__g, mpq_numref(q), mpq_denref(q)); \ |
---|
| 1370 | ASSERT (mpz_cmp_ui (__g, 1) == 0); \ |
---|
| 1371 | mpz_clear (__g); \ |
---|
| 1372 | } \ |
---|
| 1373 | } while (0) |
---|
| 1374 | #else |
---|
| 1375 | #define ASSERT_MPQ_CANONICAL(q) do {} while (0) |
---|
| 1376 | #endif |
---|
| 1377 | |
---|
| 1378 | /* Check that the nail parts are zero. */ |
---|
| 1379 | #define ASSERT_ALWAYS_LIMB(limb) \ |
---|
| 1380 | do { \ |
---|
| 1381 | mp_limb_t __nail = (limb) & GMP_NAIL_MASK; \ |
---|
| 1382 | ASSERT_ALWAYS (__nail == 0); \ |
---|
| 1383 | } while (0) |
---|
| 1384 | #define ASSERT_ALWAYS_MPN(ptr, size) \ |
---|
| 1385 | do { \ |
---|
| 1386 | /* let whole loop go dead when no nails */ \ |
---|
| 1387 | if (GMP_NAIL_BITS != 0) \ |
---|
| 1388 | { \ |
---|
| 1389 | mp_size_t __i; \ |
---|
| 1390 | for (__i = 0; __i < (size); __i++) \ |
---|
| 1391 | ASSERT_ALWAYS_LIMB ((ptr)[__i]); \ |
---|
| 1392 | } \ |
---|
| 1393 | } while (0) |
---|
| 1394 | #if WANT_ASSERT |
---|
| 1395 | #define ASSERT_LIMB(limb) ASSERT_ALWAYS_LIMB (limb) |
---|
| 1396 | #define ASSERT_MPN(ptr, size) ASSERT_ALWAYS_MPN (ptr, size) |
---|
| 1397 | #else |
---|
| 1398 | #define ASSERT_LIMB(limb) do {} while (0) |
---|
| 1399 | #define ASSERT_MPN(ptr, size) do {} while (0) |
---|
| 1400 | #endif |
---|
| 1401 | |
---|
| 1402 | |
---|
| 1403 | /* Assert that an mpn region {ptr,size} is zero, or non-zero. |
---|
| 1404 | size==0 is allowed, and in that case {ptr,size} considered to be zero. */ |
---|
| 1405 | #if WANT_ASSERT |
---|
| 1406 | #define ASSERT_MPN_ZERO_P(ptr,size) \ |
---|
| 1407 | do { \ |
---|
| 1408 | mp_size_t __i; \ |
---|
| 1409 | ASSERT ((size) >= 0); \ |
---|
| 1410 | for (__i = 0; __i < (size); __i++) \ |
---|
| 1411 | ASSERT ((ptr)[__i] == 0); \ |
---|
| 1412 | } while (0) |
---|
| 1413 | #define ASSERT_MPN_NONZERO_P(ptr,size) \ |
---|
| 1414 | do { \ |
---|
| 1415 | mp_size_t __i; \ |
---|
| 1416 | int __nonzero = 0; \ |
---|
| 1417 | ASSERT ((size) >= 0); \ |
---|
| 1418 | for (__i = 0; __i < (size); __i++) \ |
---|
| 1419 | if ((ptr)[__i] != 0) \ |
---|
| 1420 | { \ |
---|
| 1421 | __nonzero = 1; \ |
---|
| 1422 | break; \ |
---|
| 1423 | } \ |
---|
| 1424 | ASSERT (__nonzero); \ |
---|
| 1425 | } while (0) |
---|
| 1426 | #else |
---|
| 1427 | #define ASSERT_MPN_ZERO_P(ptr,size) do {} while (0) |
---|
| 1428 | #define ASSERT_MPN_NONZERO_P(ptr,size) do {} while (0) |
---|
| 1429 | #endif |
---|
| 1430 | |
---|
| 1431 | |
---|
[15293] | 1432 | #if HAVE_NATIVE_mpn_com_n |
---|
| 1433 | #define mpn_com_n __MPN(com_n) |
---|
| 1434 | void mpn_com_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t)); |
---|
| 1435 | #else |
---|
[18190] | 1436 | #define mpn_com_n(d,s,n) \ |
---|
| 1437 | do { \ |
---|
| 1438 | mp_ptr __d = (d); \ |
---|
| 1439 | mp_srcptr __s = (s); \ |
---|
| 1440 | mp_size_t __n = (n); \ |
---|
| 1441 | ASSERT (__n >= 1); \ |
---|
| 1442 | ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n)); \ |
---|
| 1443 | do \ |
---|
| 1444 | *__d++ = (~ *__s++) & GMP_NUMB_MASK; \ |
---|
| 1445 | while (--__n); \ |
---|
| 1446 | } while (0) |
---|
[15293] | 1447 | #endif |
---|
| 1448 | |
---|
[18190] | 1449 | #define MPN_LOGOPS_N_INLINE(d, s1, s2, n, operation) \ |
---|
| 1450 | do { \ |
---|
| 1451 | mp_ptr __d = (d); \ |
---|
| 1452 | mp_srcptr __s1 = (s1); \ |
---|
| 1453 | mp_srcptr __s2 = (s2); \ |
---|
| 1454 | mp_size_t __n = (n); \ |
---|
| 1455 | ASSERT (__n >= 1); \ |
---|
| 1456 | ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s1, __n)); \ |
---|
| 1457 | ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s2, __n)); \ |
---|
| 1458 | do \ |
---|
| 1459 | operation; \ |
---|
| 1460 | while (--__n); \ |
---|
| 1461 | } while (0) |
---|
[15293] | 1462 | |
---|
| 1463 | #if HAVE_NATIVE_mpn_and_n |
---|
| 1464 | #define mpn_and_n __MPN(and_n) |
---|
| 1465 | void mpn_and_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); |
---|
| 1466 | #else |
---|
[18190] | 1467 | #define mpn_and_n(d, s1, s2, n) \ |
---|
| 1468 | MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ & *__s2++) |
---|
[15293] | 1469 | #endif |
---|
| 1470 | |
---|
| 1471 | #if HAVE_NATIVE_mpn_andn_n |
---|
| 1472 | #define mpn_andn_n __MPN(andn_n) |
---|
| 1473 | void mpn_andn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); |
---|
| 1474 | #else |
---|
[18190] | 1475 | #define mpn_andn_n(d, s1, s2, n) \ |
---|
| 1476 | MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ & ~*__s2++) |
---|
[15293] | 1477 | #endif |
---|
| 1478 | |
---|
| 1479 | #if HAVE_NATIVE_mpn_nand_n |
---|
| 1480 | #define mpn_nand_n __MPN(nand_n) |
---|
| 1481 | void mpn_nand_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); |
---|
| 1482 | #else |
---|
[18190] | 1483 | #define mpn_nand_n(d, s1, s2, n) \ |
---|
| 1484 | MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~ (*__s1++ & *__s2++)) |
---|
[15293] | 1485 | #endif |
---|
| 1486 | |
---|
| 1487 | #if HAVE_NATIVE_mpn_ior_n |
---|
| 1488 | #define mpn_ior_n __MPN(ior_n) |
---|
| 1489 | void mpn_ior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); |
---|
| 1490 | #else |
---|
[18190] | 1491 | #define mpn_ior_n(d, s1, s2, n) \ |
---|
| 1492 | MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ | *__s2++) |
---|
[15293] | 1493 | #endif |
---|
| 1494 | |
---|
| 1495 | #if HAVE_NATIVE_mpn_iorn_n |
---|
| 1496 | #define mpn_iorn_n __MPN(iorn_n) |
---|
| 1497 | void mpn_iorn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); |
---|
| 1498 | #else |
---|
[18190] | 1499 | #define mpn_iorn_n(d, s1, s2, n) \ |
---|
| 1500 | MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ | ~*__s2++) |
---|
[15293] | 1501 | #endif |
---|
| 1502 | |
---|
| 1503 | #if HAVE_NATIVE_mpn_nior_n |
---|
| 1504 | #define mpn_nior_n __MPN(nior_n) |
---|
| 1505 | void mpn_nior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); |
---|
| 1506 | #else |
---|
[18190] | 1507 | #define mpn_nior_n(d, s1, s2, n) \ |
---|
| 1508 | MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~ (*__s1++ | *__s2++)) |
---|
[15293] | 1509 | #endif |
---|
| 1510 | |
---|
| 1511 | #if HAVE_NATIVE_mpn_xor_n |
---|
| 1512 | #define mpn_xor_n __MPN(xor_n) |
---|
| 1513 | void mpn_xor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); |
---|
| 1514 | #else |
---|
[18190] | 1515 | #define mpn_xor_n(d, s1, s2, n) \ |
---|
| 1516 | MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ ^ *__s2++) |
---|
[15293] | 1517 | #endif |
---|
| 1518 | |
---|
| 1519 | #if HAVE_NATIVE_mpn_xnor_n |
---|
| 1520 | #define mpn_xnor_n __MPN(xnor_n) |
---|
| 1521 | void mpn_xnor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); |
---|
| 1522 | #else |
---|
[18190] | 1523 | #define mpn_xnor_n(d, s1, s2, n) \ |
---|
| 1524 | MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~ (*__s1++ ^ *__s2++)) |
---|
[15293] | 1525 | #endif |
---|
| 1526 | |
---|
[18190] | 1527 | |
---|
| 1528 | /* SUBC_LIMB sets w=x-y and cout to 0 or 1 for a borrow from that |
---|
| 1529 | subtract. */ |
---|
| 1530 | #if GMP_NAIL_BITS == 0 |
---|
| 1531 | #define SUBC_LIMB(cout, w, x, y) \ |
---|
| 1532 | do { \ |
---|
| 1533 | mp_limb_t __x = (x); \ |
---|
| 1534 | mp_limb_t __y = (y); \ |
---|
| 1535 | mp_limb_t __w = __x - __y; \ |
---|
| 1536 | (w) = __w; \ |
---|
| 1537 | (cout) = __w > __x; \ |
---|
| 1538 | } while (0) |
---|
| 1539 | #else |
---|
| 1540 | #define SUBC_LIMB(cout, w, x, y) \ |
---|
| 1541 | do { \ |
---|
| 1542 | mp_limb_t __w = (x) - (y); \ |
---|
| 1543 | (w) = __w & GMP_NUMB_MASK; \ |
---|
| 1544 | (cout) = __w >> (GMP_LIMB_BITS-1); \ |
---|
| 1545 | } while (0) |
---|
| 1546 | #endif |
---|
| 1547 | |
---|
| 1548 | |
---|
| 1549 | /* MPN_INCR_U does {ptr,size} += n, MPN_DECR_U does {ptr,size} -= n, both |
---|
| 1550 | expecting no carry (or borrow) from that. |
---|
| 1551 | |
---|
| 1552 | The size parameter is only for the benefit of assertion checking. In a |
---|
| 1553 | normal build it's unused and the carry/borrow is just propagated as far |
---|
| 1554 | as it needs to go. |
---|
| 1555 | |
---|
| 1556 | On random data, usually only one or two limbs of {ptr,size} get updated, |
---|
| 1557 | so there's no need for any sophisticated looping, just something compact |
---|
| 1558 | and sensible. |
---|
| 1559 | |
---|
| 1560 | FIXME: Do the generic MPN_{INCR,DECR}_U with a block of code like |
---|
| 1561 | mpn_incr_u but with the assertions built in, rather than the separate |
---|
| 1562 | add_1 and sub_1 when assertion checking. |
---|
| 1563 | |
---|
| 1564 | FIXME: Switch all code from mpn_{incr,decr}_u to MPN_{INCR,DECR}_U, |
---|
| 1565 | declaring their operand sizes, then remove the former. This is purely |
---|
| 1566 | for the benefit of assertion checking. */ |
---|
| 1567 | |
---|
| 1568 | #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86 && GMP_NAIL_BITS == 0 \ |
---|
| 1569 | && BITS_PER_MP_LIMB == 32 && ! defined (NO_ASM) && ! WANT_ASSERT |
---|
| 1570 | /* Better flags handling than the generic C gives on i386, saving a few |
---|
| 1571 | bytes of code and maybe a cycle or two. */ |
---|
| 1572 | |
---|
| 1573 | #define MPN_IORD_U(ptr, incr, aors) \ |
---|
| 1574 | do { \ |
---|
| 1575 | mp_ptr __ptr_dummy; \ |
---|
| 1576 | if (__builtin_constant_p (incr) && (incr) == 1) \ |
---|
| 1577 | { \ |
---|
| 1578 | __asm__ __volatile__ \ |
---|
| 1579 | ("\n" ASM_L(top) ":\n" \ |
---|
| 1580 | "\t" aors " $1, (%0)\n" \ |
---|
| 1581 | "\tleal 4(%0),%0\n" \ |
---|
| 1582 | "\tjc " ASM_L(top) \ |
---|
| 1583 | : "=r" (__ptr_dummy) \ |
---|
| 1584 | : "0" (ptr) \ |
---|
| 1585 | : "memory"); \ |
---|
| 1586 | } \ |
---|
| 1587 | else \ |
---|
| 1588 | { \ |
---|
| 1589 | __asm__ __volatile__ \ |
---|
| 1590 | ( aors " %2,(%0)\n" \ |
---|
| 1591 | "\tjnc " ASM_L(done) "\n" \ |
---|
| 1592 | ASM_L(top) ":\n" \ |
---|
| 1593 | "\t" aors " $1,4(%0)\n" \ |
---|
| 1594 | "\tleal 4(%0),%0\n" \ |
---|
| 1595 | "\tjc " ASM_L(top) "\n" \ |
---|
| 1596 | ASM_L(done) ":\n" \ |
---|
| 1597 | : "=r" (__ptr_dummy) \ |
---|
| 1598 | : "0" (ptr), \ |
---|
| 1599 | "ri" (incr) \ |
---|
| 1600 | : "memory"); \ |
---|
| 1601 | } \ |
---|
| 1602 | } while (0) |
---|
| 1603 | |
---|
| 1604 | #define MPN_INCR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "addl") |
---|
| 1605 | #define MPN_DECR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "subl") |
---|
| 1606 | #define mpn_incr_u(ptr, incr) MPN_INCR_U (ptr, 0, incr) |
---|
| 1607 | #define mpn_decr_u(ptr, incr) MPN_DECR_U (ptr, 0, incr) |
---|
| 1608 | #endif |
---|
| 1609 | |
---|
| 1610 | #if GMP_NAIL_BITS == 0 |
---|
| 1611 | #ifndef mpn_incr_u |
---|
| 1612 | #define mpn_incr_u(p,incr) \ |
---|
| 1613 | do { \ |
---|
| 1614 | mp_limb_t __x; \ |
---|
| 1615 | mp_ptr __p = (p); \ |
---|
| 1616 | if (__builtin_constant_p (incr) && (incr) == 1) \ |
---|
| 1617 | { \ |
---|
| 1618 | while (++(*(__p++)) == 0) \ |
---|
| 1619 | ; \ |
---|
| 1620 | } \ |
---|
| 1621 | else \ |
---|
| 1622 | { \ |
---|
| 1623 | __x = *__p + (incr); \ |
---|
| 1624 | *__p = __x; \ |
---|
| 1625 | if (__x < (incr)) \ |
---|
| 1626 | while (++(*(++__p)) == 0) \ |
---|
| 1627 | ; \ |
---|
| 1628 | } \ |
---|
| 1629 | } while (0) |
---|
| 1630 | #endif |
---|
| 1631 | #ifndef mpn_decr_u |
---|
| 1632 | #define mpn_decr_u(p,incr) \ |
---|
| 1633 | do { \ |
---|
| 1634 | mp_limb_t __x; \ |
---|
| 1635 | mp_ptr __p = (p); \ |
---|
| 1636 | if (__builtin_constant_p (incr) && (incr) == 1) \ |
---|
| 1637 | { \ |
---|
| 1638 | while ((*(__p++))-- == 0) \ |
---|
| 1639 | ; \ |
---|
| 1640 | } \ |
---|
| 1641 | else \ |
---|
| 1642 | { \ |
---|
| 1643 | __x = *__p; \ |
---|
| 1644 | *__p = __x - (incr); \ |
---|
| 1645 | if (__x < (incr)) \ |
---|
| 1646 | while ((*(++__p))-- == 0) \ |
---|
| 1647 | ; \ |
---|
| 1648 | } \ |
---|
| 1649 | } while (0) |
---|
| 1650 | #endif |
---|
| 1651 | #endif |
---|
| 1652 | |
---|
| 1653 | #if GMP_NAIL_BITS >= 1 |
---|
| 1654 | #ifndef mpn_incr_u |
---|
| 1655 | #define mpn_incr_u(p,incr) \ |
---|
| 1656 | do { \ |
---|
| 1657 | mp_limb_t __x; \ |
---|
| 1658 | mp_ptr __p = (p); \ |
---|
| 1659 | if (__builtin_constant_p (incr) && (incr) == 1) \ |
---|
| 1660 | { \ |
---|
| 1661 | do \ |
---|
| 1662 | { \ |
---|
| 1663 | __x = (*__p + 1) & GMP_NUMB_MASK; \ |
---|
| 1664 | *__p++ = __x; \ |
---|
| 1665 | } \ |
---|
| 1666 | while (__x == 0); \ |
---|
| 1667 | } \ |
---|
| 1668 | else \ |
---|
| 1669 | { \ |
---|
| 1670 | __x = (*__p + (incr)); \ |
---|
| 1671 | *__p++ = __x & GMP_NUMB_MASK; \ |
---|
| 1672 | if (__x >> GMP_NUMB_BITS != 0) \ |
---|
| 1673 | { \ |
---|
| 1674 | do \ |
---|
| 1675 | { \ |
---|
| 1676 | __x = (*__p + 1) & GMP_NUMB_MASK; \ |
---|
| 1677 | *__p++ = __x; \ |
---|
| 1678 | } \ |
---|
| 1679 | while (__x == 0); \ |
---|
| 1680 | } \ |
---|
| 1681 | } \ |
---|
| 1682 | } while (0) |
---|
| 1683 | #endif |
---|
| 1684 | #ifndef mpn_decr_u |
---|
| 1685 | #define mpn_decr_u(p,incr) \ |
---|
| 1686 | do { \ |
---|
| 1687 | mp_limb_t __x; \ |
---|
| 1688 | mp_ptr __p = (p); \ |
---|
| 1689 | if (__builtin_constant_p (incr) && (incr) == 1) \ |
---|
| 1690 | { \ |
---|
| 1691 | do \ |
---|
| 1692 | { \ |
---|
| 1693 | __x = *__p; \ |
---|
| 1694 | *__p++ = (__x - 1) & GMP_NUMB_MASK; \ |
---|
| 1695 | } \ |
---|
| 1696 | while (__x == 0); \ |
---|
| 1697 | } \ |
---|
| 1698 | else \ |
---|
| 1699 | { \ |
---|
| 1700 | __x = *__p - (incr); \ |
---|
| 1701 | *__p++ = __x & GMP_NUMB_MASK; \ |
---|
| 1702 | if (__x >> GMP_NUMB_BITS != 0) \ |
---|
| 1703 | { \ |
---|
| 1704 | do \ |
---|
| 1705 | { \ |
---|
| 1706 | __x = *__p; \ |
---|
| 1707 | *__p++ = (__x - 1) & GMP_NUMB_MASK; \ |
---|
| 1708 | } \ |
---|
| 1709 | while (__x == 0); \ |
---|
| 1710 | } \ |
---|
| 1711 | } \ |
---|
| 1712 | } while (0) |
---|
| 1713 | #endif |
---|
| 1714 | #endif |
---|
| 1715 | |
---|
| 1716 | #ifndef MPN_INCR_U |
---|
| 1717 | #if WANT_ASSERT |
---|
| 1718 | #define MPN_INCR_U(ptr, size, n) \ |
---|
| 1719 | do { \ |
---|
| 1720 | ASSERT ((size) >= 1); \ |
---|
| 1721 | ASSERT_NOCARRY (mpn_add_1 (ptr, ptr, size, n)); \ |
---|
| 1722 | } while (0) |
---|
| 1723 | #else |
---|
| 1724 | #define MPN_INCR_U(ptr, size, n) mpn_incr_u (ptr, n) |
---|
| 1725 | #endif |
---|
| 1726 | #endif |
---|
| 1727 | |
---|
| 1728 | #ifndef MPN_DECR_U |
---|
| 1729 | #if WANT_ASSERT |
---|
| 1730 | #define MPN_DECR_U(ptr, size, n) \ |
---|
| 1731 | do { \ |
---|
| 1732 | ASSERT ((size) >= 1); \ |
---|
| 1733 | ASSERT_NOCARRY (mpn_sub_1 (ptr, ptr, size, n)); \ |
---|
| 1734 | } while (0) |
---|
| 1735 | #else |
---|
| 1736 | #define MPN_DECR_U(ptr, size, n) mpn_decr_u (ptr, n) |
---|
| 1737 | #endif |
---|
| 1738 | #endif |
---|
| 1739 | |
---|
| 1740 | |
---|
[15293] | 1741 | /* Structure for conversion between internal binary format and |
---|
| 1742 | strings in base 2..36. */ |
---|
| 1743 | struct bases |
---|
| 1744 | { |
---|
| 1745 | /* Number of digits in the conversion base that always fits in an mp_limb_t. |
---|
| 1746 | For example, for base 10 on a machine where a mp_limb_t has 32 bits this |
---|
| 1747 | is 9, since 10**9 is the largest number that fits into a mp_limb_t. */ |
---|
| 1748 | int chars_per_limb; |
---|
| 1749 | |
---|
| 1750 | /* log(2)/log(conversion_base) */ |
---|
| 1751 | double chars_per_bit_exactly; |
---|
| 1752 | |
---|
| 1753 | /* base**chars_per_limb, i.e. the biggest number that fits a word, built by |
---|
| 1754 | factors of base. Exception: For 2, 4, 8, etc, big_base is log2(base), |
---|
| 1755 | i.e. the number of bits used to represent each digit in the base. */ |
---|
| 1756 | mp_limb_t big_base; |
---|
| 1757 | |
---|
| 1758 | /* A BITS_PER_MP_LIMB bit approximation to 1/big_base, represented as a |
---|
| 1759 | fixed-point number. Instead of dividing by big_base an application can |
---|
| 1760 | choose to multiply by big_base_inverted. */ |
---|
| 1761 | mp_limb_t big_base_inverted; |
---|
| 1762 | }; |
---|
| 1763 | |
---|
[18190] | 1764 | #define mp_bases __MPN(bases) |
---|
| 1765 | #define __mp_bases __MPN(bases) |
---|
| 1766 | __GMP_DECLSPEC extern const struct bases mp_bases[257]; |
---|
[15293] | 1767 | |
---|
[18190] | 1768 | /* mp_bases[10] values, generated by mpn/mp_bases.c */ |
---|
| 1769 | #if GMP_NUMB_BITS == 4 |
---|
| 1770 | #define MP_BASES_CHARS_PER_LIMB_10 1 |
---|
| 1771 | #define MP_BASES_BIG_BASE_10 CNST_LIMB(0xa) |
---|
| 1772 | #define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0x9) |
---|
| 1773 | #define MP_BASES_NORMALIZATION_STEPS_10 0 |
---|
| 1774 | #endif |
---|
| 1775 | #if GMP_NUMB_BITS == 8 |
---|
| 1776 | #define MP_BASES_CHARS_PER_LIMB_10 2 |
---|
| 1777 | #define MP_BASES_BIG_BASE_10 CNST_LIMB(0x64) |
---|
| 1778 | #define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0x47) |
---|
| 1779 | #define MP_BASES_NORMALIZATION_STEPS_10 1 |
---|
| 1780 | #endif |
---|
| 1781 | #if GMP_NUMB_BITS == 16 |
---|
| 1782 | #define MP_BASES_CHARS_PER_LIMB_10 4 |
---|
| 1783 | #define MP_BASES_BIG_BASE_10 CNST_LIMB(0x2710) |
---|
| 1784 | #define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0xa36e) |
---|
| 1785 | #define MP_BASES_NORMALIZATION_STEPS_10 2 |
---|
| 1786 | #endif |
---|
| 1787 | #if GMP_NUMB_BITS == 28 |
---|
| 1788 | #define MP_BASES_CHARS_PER_LIMB_10 8 |
---|
| 1789 | #define MP_BASES_BIG_BASE_10 CNST_LIMB(0x5f5e100) |
---|
| 1790 | #define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0x5798ee23) |
---|
| 1791 | #define MP_BASES_NORMALIZATION_STEPS_10 5 |
---|
| 1792 | #endif |
---|
| 1793 | #if GMP_NUMB_BITS == 30 |
---|
| 1794 | #define MP_BASES_CHARS_PER_LIMB_10 9 |
---|
| 1795 | #define MP_BASES_BIG_BASE_10 CNST_LIMB(0x3b9aca00) |
---|
| 1796 | #define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0x12e0be82) |
---|
| 1797 | #define MP_BASES_NORMALIZATION_STEPS_10 2 |
---|
| 1798 | #endif |
---|
| 1799 | #if GMP_NUMB_BITS == 32 |
---|
| 1800 | #define MP_BASES_CHARS_PER_LIMB_10 9 |
---|
| 1801 | #define MP_BASES_BIG_BASE_10 CNST_LIMB(0x3b9aca00) |
---|
| 1802 | #define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0x12e0be82) |
---|
| 1803 | #define MP_BASES_NORMALIZATION_STEPS_10 2 |
---|
| 1804 | #endif |
---|
| 1805 | #if GMP_NUMB_BITS == 60 |
---|
| 1806 | #define MP_BASES_CHARS_PER_LIMB_10 18 |
---|
| 1807 | #define MP_BASES_BIG_BASE_10 CNST_LIMB(0xde0b6b3a7640000) |
---|
| 1808 | #define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0x2725dd1d243aba0e) |
---|
| 1809 | #define MP_BASES_NORMALIZATION_STEPS_10 4 |
---|
| 1810 | #endif |
---|
| 1811 | #if GMP_NUMB_BITS == 62 |
---|
| 1812 | #define MP_BASES_CHARS_PER_LIMB_10 18 |
---|
| 1813 | #define MP_BASES_BIG_BASE_10 CNST_LIMB(0xde0b6b3a7640000) |
---|
| 1814 | #define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0x2725dd1d243aba0e) |
---|
| 1815 | #define MP_BASES_NORMALIZATION_STEPS_10 4 |
---|
| 1816 | #endif |
---|
| 1817 | #if GMP_NUMB_BITS == 64 |
---|
| 1818 | #define MP_BASES_CHARS_PER_LIMB_10 19 |
---|
| 1819 | #define MP_BASES_BIG_BASE_10 CNST_LIMB(0x8ac7230489e80000) |
---|
| 1820 | #define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0xd83c94fb6d2ac34a) |
---|
| 1821 | #define MP_BASES_NORMALIZATION_STEPS_10 0 |
---|
| 1822 | #endif |
---|
| 1823 | |
---|
| 1824 | |
---|
| 1825 | /* For power of 2 bases this is exact. For other bases the result is either |
---|
| 1826 | exact or one too big. |
---|
| 1827 | |
---|
| 1828 | To be exact always it'd be necessary to examine all the limbs of the |
---|
| 1829 | operand, since numbers like 100..000 and 99...999 generally differ only |
---|
| 1830 | in the lowest limb. It'd be possible to examine just a couple of high |
---|
| 1831 | limbs to increase the probability of being exact, but that doesn't seem |
---|
| 1832 | worth bothering with. */ |
---|
| 1833 | |
---|
| 1834 | #define MPN_SIZEINBASE(result, ptr, size, base) \ |
---|
| 1835 | do { \ |
---|
| 1836 | int __lb_base, __cnt; \ |
---|
| 1837 | mp_size_t __totbits; \ |
---|
| 1838 | \ |
---|
| 1839 | ASSERT ((size) >= 0); \ |
---|
| 1840 | ASSERT ((base) >= 2); \ |
---|
| 1841 | ASSERT ((base) < numberof (mp_bases)); \ |
---|
| 1842 | \ |
---|
| 1843 | /* Special case for X == 0. */ \ |
---|
| 1844 | if ((size) == 0) \ |
---|
| 1845 | (result) = 1; \ |
---|
| 1846 | else \ |
---|
| 1847 | { \ |
---|
| 1848 | /* Calculate the total number of significant bits of X. */ \ |
---|
| 1849 | count_leading_zeros (__cnt, (ptr)[(size)-1]); \ |
---|
| 1850 | __totbits = (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS); \ |
---|
| 1851 | \ |
---|
| 1852 | if (POW2_P (base)) \ |
---|
| 1853 | { \ |
---|
| 1854 | __lb_base = mp_bases[base].big_base; \ |
---|
| 1855 | (result) = (__totbits + __lb_base - 1) / __lb_base; \ |
---|
| 1856 | } \ |
---|
| 1857 | else \ |
---|
| 1858 | (result) = (size_t) \ |
---|
| 1859 | (__totbits * mp_bases[base].chars_per_bit_exactly) + 1; \ |
---|
| 1860 | } \ |
---|
| 1861 | } while (0) |
---|
| 1862 | |
---|
| 1863 | /* eliminate mp_bases lookups for base==16 */ |
---|
| 1864 | #define MPN_SIZEINBASE_16(result, ptr, size) \ |
---|
| 1865 | do { \ |
---|
| 1866 | int __cnt; \ |
---|
| 1867 | mp_size_t __totbits; \ |
---|
| 1868 | \ |
---|
| 1869 | ASSERT ((size) >= 0); \ |
---|
| 1870 | \ |
---|
| 1871 | /* Special case for X == 0. */ \ |
---|
| 1872 | if ((size) == 0) \ |
---|
| 1873 | (result) = 1; \ |
---|
| 1874 | else \ |
---|
| 1875 | { \ |
---|
| 1876 | /* Calculate the total number of significant bits of X. */ \ |
---|
| 1877 | count_leading_zeros (__cnt, (ptr)[(size)-1]); \ |
---|
| 1878 | __totbits = (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS); \ |
---|
| 1879 | (result) = (__totbits + 4 - 1) / 4; \ |
---|
| 1880 | } \ |
---|
| 1881 | } while (0) |
---|
| 1882 | |
---|
| 1883 | |
---|
| 1884 | #if HAVE_HOST_CPU_FAMILY_x86 |
---|
[15293] | 1885 | #define TARGET_REGISTER_STARVED 1 |
---|
| 1886 | #else |
---|
| 1887 | #define TARGET_REGISTER_STARVED 0 |
---|
| 1888 | #endif |
---|
| 1889 | |
---|
| 1890 | /* Use a library function for invert_limb, if available. */ |
---|
| 1891 | #if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb |
---|
| 1892 | #define mpn_invert_limb __MPN(invert_limb) |
---|
[18190] | 1893 | mp_limb_t mpn_invert_limb _PROTO ((mp_limb_t)) ATTRIBUTE_CONST; |
---|
| 1894 | #define invert_limb(invxl,xl) (invxl = mpn_invert_limb (xl)) |
---|
[15293] | 1895 | #endif |
---|
| 1896 | |
---|
| 1897 | #ifndef invert_limb |
---|
[18190] | 1898 | #define invert_limb(invxl,xl) \ |
---|
| 1899 | do { \ |
---|
| 1900 | mp_limb_t dummy; \ |
---|
| 1901 | ASSERT ((xl) != 0); \ |
---|
| 1902 | if (xl << 1 == 0) \ |
---|
| 1903 | invxl = ~(mp_limb_t) 0; \ |
---|
| 1904 | else \ |
---|
| 1905 | udiv_qrnnd (invxl, dummy, -xl, 0, xl); \ |
---|
[15293] | 1906 | } while (0) |
---|
| 1907 | #endif |
---|
| 1908 | |
---|
| 1909 | /* Divide the two-limb number in (NH,,NL) by D, with DI being the largest |
---|
| 1910 | limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB). |
---|
| 1911 | If this would yield overflow, DI should be the largest possible number |
---|
| 1912 | (i.e., only ones). For correct operation, the most significant bit of D |
---|
| 1913 | has to be set. Put the quotient in Q and the remainder in R. */ |
---|
[18190] | 1914 | #define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \ |
---|
| 1915 | do { \ |
---|
| 1916 | mp_limb_t _q, _ql, _r; \ |
---|
| 1917 | mp_limb_t _xh, _xl; \ |
---|
| 1918 | ASSERT ((d) != 0); \ |
---|
| 1919 | umul_ppmm (_q, _ql, (nh), (di)); \ |
---|
| 1920 | _q += (nh); /* DI is 2**BITS_PER_MP_LIMB too small */ \ |
---|
| 1921 | umul_ppmm (_xh, _xl, _q, (d)); \ |
---|
| 1922 | sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl); \ |
---|
| 1923 | if (_xh != 0) \ |
---|
| 1924 | { \ |
---|
| 1925 | sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \ |
---|
| 1926 | _q += 1; \ |
---|
| 1927 | if (_xh != 0) \ |
---|
| 1928 | { \ |
---|
| 1929 | _r -= (d); \ |
---|
| 1930 | _q += 1; \ |
---|
| 1931 | } \ |
---|
| 1932 | } \ |
---|
| 1933 | if (_r >= (d)) \ |
---|
| 1934 | { \ |
---|
| 1935 | _r -= (d); \ |
---|
| 1936 | _q += 1; \ |
---|
| 1937 | } \ |
---|
| 1938 | (r) = _r; \ |
---|
| 1939 | (q) = _q; \ |
---|
[15293] | 1940 | } while (0) |
---|
| 1941 | /* Like udiv_qrnnd_preinv, but for for any value D. DNORM is D shifted left |
---|
| 1942 | so that its most significant bit is set. LGUP is ceil(log2(D)). */ |
---|
| 1943 | #define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \ |
---|
| 1944 | do { \ |
---|
| 1945 | mp_limb_t _n2, _n10, _n1, _nadj, _q1; \ |
---|
| 1946 | mp_limb_t _xh, _xl; \ |
---|
| 1947 | _n2 = ((nh) << (BITS_PER_MP_LIMB - (lgup))) + ((nl) >> 1 >> (l - 1));\ |
---|
| 1948 | _n10 = (nl) << (BITS_PER_MP_LIMB - (lgup)); \ |
---|
| 1949 | _n1 = ((mp_limb_signed_t) _n10 >> (BITS_PER_MP_LIMB - 1)); \ |
---|
| 1950 | _nadj = _n10 + (_n1 & (dnorm)); \ |
---|
| 1951 | umul_ppmm (_xh, _xl, di, _n2 - _n1); \ |
---|
| 1952 | add_ssaaaa (_xh, _xl, _xh, _xl, 0, _nadj); \ |
---|
| 1953 | _q1 = ~(_n2 + _xh); \ |
---|
| 1954 | umul_ppmm (_xh, _xl, _q1, d); \ |
---|
| 1955 | add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \ |
---|
| 1956 | _xh -= (d); \ |
---|
| 1957 | (r) = _xl + ((d) & _xh); \ |
---|
| 1958 | (q) = _xh - _q1; \ |
---|
| 1959 | } while (0) |
---|
| 1960 | /* Exactly like udiv_qrnnd_preinv, but branch-free. It is not clear which |
---|
| 1961 | version to use. */ |
---|
| 1962 | #define udiv_qrnnd_preinv2norm(q, r, nh, nl, d, di) \ |
---|
| 1963 | do { \ |
---|
| 1964 | mp_limb_t _n2, _n10, _n1, _nadj, _q1; \ |
---|
| 1965 | mp_limb_t _xh, _xl; \ |
---|
| 1966 | _n2 = (nh); \ |
---|
| 1967 | _n10 = (nl); \ |
---|
| 1968 | _n1 = ((mp_limb_signed_t) _n10 >> (BITS_PER_MP_LIMB - 1)); \ |
---|
| 1969 | _nadj = _n10 + (_n1 & (d)); \ |
---|
| 1970 | umul_ppmm (_xh, _xl, di, _n2 - _n1); \ |
---|
| 1971 | add_ssaaaa (_xh, _xl, _xh, _xl, 0, _nadj); \ |
---|
| 1972 | _q1 = ~(_n2 + _xh); \ |
---|
| 1973 | umul_ppmm (_xh, _xl, _q1, d); \ |
---|
| 1974 | add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \ |
---|
| 1975 | _xh -= (d); \ |
---|
| 1976 | (r) = _xl + ((d) & _xh); \ |
---|
| 1977 | (q) = _xh - _q1; \ |
---|
| 1978 | } while (0) |
---|
| 1979 | |
---|
| 1980 | |
---|
[18190] | 1981 | #define mpn_preinv_divrem_1 __MPN(preinv_divrem_1) |
---|
| 1982 | mp_limb_t mpn_preinv_divrem_1 _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int)); |
---|
[15293] | 1983 | |
---|
[18190] | 1984 | |
---|
| 1985 | /* USE_PREINV_DIVREM_1 is whether to use mpn_preinv_divrem_1, as opposed to |
---|
| 1986 | the plain mpn_divrem_1. Likewise USE_PREINV_MOD_1 chooses between |
---|
| 1987 | mpn_preinv_mod_1 and plain mpn_mod_1. The default for both is yes, since |
---|
| 1988 | the few CISC chips where preinv is not good have defines saying so. */ |
---|
| 1989 | #ifndef USE_PREINV_DIVREM_1 |
---|
| 1990 | #define USE_PREINV_DIVREM_1 1 |
---|
| 1991 | #endif |
---|
| 1992 | #ifndef USE_PREINV_MOD_1 |
---|
| 1993 | #define USE_PREINV_MOD_1 1 |
---|
| 1994 | #endif |
---|
| 1995 | |
---|
| 1996 | #if USE_PREINV_DIVREM_1 |
---|
| 1997 | #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \ |
---|
| 1998 | mpn_preinv_divrem_1 (qp, xsize, ap, size, d, dinv, shift) |
---|
| 1999 | #else |
---|
| 2000 | #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \ |
---|
| 2001 | mpn_divrem_1 (qp, xsize, ap, size, d) |
---|
| 2002 | #endif |
---|
| 2003 | |
---|
| 2004 | #if USE_PREINV_MOD_1 |
---|
| 2005 | #define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse) \ |
---|
| 2006 | mpn_preinv_mod_1 (src, size, divisor, inverse) |
---|
| 2007 | #else |
---|
| 2008 | #define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse) \ |
---|
| 2009 | mpn_mod_1 (src, size, divisor) |
---|
| 2010 | #endif |
---|
| 2011 | |
---|
| 2012 | |
---|
| 2013 | #define mpn_mod_34lsub1 __MPN(mod_34lsub1) |
---|
| 2014 | mp_limb_t mpn_mod_34lsub1 _PROTO ((mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE; |
---|
| 2015 | |
---|
| 2016 | |
---|
| 2017 | /* DIVEXACT_1_THRESHOLD is at what size to use mpn_divexact_1, as opposed to |
---|
| 2018 | plain mpn_divrem_1. Likewise MODEXACT_1_ODD_THRESHOLD for |
---|
| 2019 | mpn_modexact_1_odd against plain mpn_mod_1. On most CPUs divexact and |
---|
| 2020 | modexact are faster at all sizes, so the defaults are 0. Those CPUs |
---|
| 2021 | where this is not right have a tuned threshold. */ |
---|
| 2022 | #ifndef DIVEXACT_1_THRESHOLD |
---|
| 2023 | #define DIVEXACT_1_THRESHOLD 0 |
---|
| 2024 | #endif |
---|
| 2025 | #ifndef MODEXACT_1_ODD_THRESHOLD |
---|
| 2026 | #define MODEXACT_1_ODD_THRESHOLD 0 |
---|
| 2027 | #endif |
---|
| 2028 | |
---|
| 2029 | #define mpn_divexact_1 __MPN(divexact_1) |
---|
| 2030 | void mpn_divexact_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)); |
---|
| 2031 | |
---|
| 2032 | #define MPN_DIVREM_OR_DIVEXACT_1(dst, src, size, divisor) \ |
---|
| 2033 | do { \ |
---|
| 2034 | if (BELOW_THRESHOLD (size, DIVEXACT_1_THRESHOLD)) \ |
---|
| 2035 | ASSERT_NOCARRY (mpn_divrem_1 (dst, (mp_size_t) 0, src, size, divisor)); \ |
---|
| 2036 | else \ |
---|
| 2037 | { \ |
---|
| 2038 | ASSERT (mpn_mod_1 (src, size, divisor) == 0); \ |
---|
| 2039 | mpn_divexact_1 (dst, src, size, divisor); \ |
---|
| 2040 | } \ |
---|
| 2041 | } while (0) |
---|
| 2042 | |
---|
| 2043 | #define mpn_modexact_1c_odd __MPN(modexact_1c_odd) |
---|
| 2044 | mp_limb_t mpn_modexact_1c_odd _PROTO ((mp_srcptr src, mp_size_t size, |
---|
| 2045 | mp_limb_t divisor, mp_limb_t c)) __GMP_ATTRIBUTE_PURE; |
---|
| 2046 | |
---|
| 2047 | #if HAVE_NATIVE_mpn_modexact_1_odd |
---|
| 2048 | #define mpn_modexact_1_odd __MPN(modexact_1_odd) |
---|
| 2049 | mp_limb_t mpn_modexact_1_odd _PROTO ((mp_srcptr src, mp_size_t size, |
---|
| 2050 | mp_limb_t divisor)) __GMP_ATTRIBUTE_PURE; |
---|
| 2051 | #else |
---|
| 2052 | #define mpn_modexact_1_odd(src,size,divisor) \ |
---|
| 2053 | mpn_modexact_1c_odd (src, size, divisor, CNST_LIMB(0)) |
---|
| 2054 | #endif |
---|
| 2055 | |
---|
| 2056 | #define MPN_MOD_OR_MODEXACT_1_ODD(src,size,divisor) \ |
---|
| 2057 | (ABOVE_THRESHOLD (size, MODEXACT_1_ODD_THRESHOLD) \ |
---|
| 2058 | ? mpn_modexact_1_odd (src, size, divisor) \ |
---|
| 2059 | : mpn_mod_1 (src, size, divisor)) |
---|
| 2060 | |
---|
| 2061 | |
---|
| 2062 | /* modlimb_invert() sets inv to the multiplicative inverse of n modulo |
---|
| 2063 | 2^BITS_PER_MP_LIMB, ie. satisfying inv*n == 1 mod 2^BITS_PER_MP_LIMB. |
---|
| 2064 | n must be odd (otherwise such an inverse doesn't exist). |
---|
| 2065 | |
---|
[15293] | 2066 | This is not to be confused with invert_limb(), which is completely |
---|
| 2067 | different. |
---|
| 2068 | |
---|
| 2069 | The table lookup gives an inverse with the low 8 bits valid, and each |
---|
[18190] | 2070 | multiply step doubles the number of bits. See Jebelean "An algorithm for |
---|
| 2071 | exact division" end of section 4 (reference in gmp.texi). |
---|
[15293] | 2072 | |
---|
[18190] | 2073 | Possible enhancement: Could use UHWtype until the last step, if half-size |
---|
| 2074 | multiplies are faster (might help under _LONG_LONG_LIMB). |
---|
| 2075 | |
---|
| 2076 | Alternative: As noted in Granlund and Montgomery "Division by Invariant |
---|
| 2077 | Integers using Multiplication" (reference in gmp.texi), n itself gives a |
---|
| 2078 | 3-bit inverse immediately, and could be used instead of a table lookup. |
---|
| 2079 | A 4-bit inverse can be obtained effectively from xoring bits 1 and 2 into |
---|
| 2080 | bit 3, for instance with (((n + 2) & 4) << 1) ^ n. */ |
---|
| 2081 | |
---|
[15293] | 2082 | #define modlimb_invert_table __gmp_modlimb_invert_table |
---|
[18190] | 2083 | __GMP_DECLSPEC extern const unsigned char modlimb_invert_table[128]; |
---|
[15293] | 2084 | |
---|
[18190] | 2085 | #if BITS_PER_MP_LIMB <= 8 |
---|
| 2086 | #define modlimb_invert(inv,n) \ |
---|
| 2087 | do { \ |
---|
| 2088 | mp_limb_t __n = (n); \ |
---|
| 2089 | mp_limb_t __inv; \ |
---|
| 2090 | ASSERT ((__n & 1) == 1); \ |
---|
| 2091 | __inv = modlimb_invert_table[(__n/2) & 0x7F]; /* 8 */ \ |
---|
| 2092 | ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1); \ |
---|
| 2093 | (inv) = __inv & GMP_NUMB_MASK; \ |
---|
| 2094 | } while (0) |
---|
| 2095 | #else |
---|
| 2096 | #if BITS_PER_MP_LIMB <= 16 |
---|
| 2097 | #define modlimb_invert(inv,n) \ |
---|
| 2098 | do { \ |
---|
| 2099 | mp_limb_t __n = (n); \ |
---|
| 2100 | mp_limb_t __inv; \ |
---|
| 2101 | ASSERT ((__n & 1) == 1); \ |
---|
| 2102 | __inv = modlimb_invert_table[(__n/2) & 0x7F]; /* 8 */ \ |
---|
| 2103 | __inv = 2 * __inv - __inv * __inv * __n; /* 16 */ \ |
---|
| 2104 | ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1); \ |
---|
| 2105 | (inv) = __inv & GMP_NUMB_MASK; \ |
---|
| 2106 | } while (0) |
---|
| 2107 | #else |
---|
[15293] | 2108 | #if BITS_PER_MP_LIMB <= 32 |
---|
| 2109 | #define modlimb_invert(inv,n) \ |
---|
| 2110 | do { \ |
---|
| 2111 | mp_limb_t __n = (n); \ |
---|
| 2112 | mp_limb_t __inv; \ |
---|
| 2113 | ASSERT ((__n & 1) == 1); \ |
---|
[18190] | 2114 | __inv = modlimb_invert_table[(__n/2) & 0x7F]; /* 8 */ \ |
---|
| 2115 | __inv = 2 * __inv - __inv * __inv * __n; /* 16 */ \ |
---|
| 2116 | __inv = 2 * __inv - __inv * __inv * __n; /* 32 */ \ |
---|
| 2117 | ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1); \ |
---|
| 2118 | (inv) = __inv & GMP_NUMB_MASK; \ |
---|
[15293] | 2119 | } while (0) |
---|
[18190] | 2120 | #else |
---|
| 2121 | #if BITS_PER_MP_LIMB <= 64 |
---|
[15293] | 2122 | #define modlimb_invert(inv,n) \ |
---|
| 2123 | do { \ |
---|
| 2124 | mp_limb_t __n = (n); \ |
---|
| 2125 | mp_limb_t __inv; \ |
---|
| 2126 | ASSERT ((__n & 1) == 1); \ |
---|
[18190] | 2127 | __inv = modlimb_invert_table[(__n/2) & 0x7F]; /* 8 */ \ |
---|
| 2128 | __inv = 2 * __inv - __inv * __inv * __n; /* 16 */ \ |
---|
| 2129 | __inv = 2 * __inv - __inv * __inv * __n; /* 32 */ \ |
---|
| 2130 | __inv = 2 * __inv - __inv * __inv * __n; /* 64 */ \ |
---|
| 2131 | ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1); \ |
---|
| 2132 | (inv) = __inv & GMP_NUMB_MASK; \ |
---|
[15293] | 2133 | } while (0) |
---|
[18190] | 2134 | #endif /* 64 */ |
---|
| 2135 | #endif /* 32 */ |
---|
| 2136 | #endif /* 16 */ |
---|
| 2137 | #endif /* 8 */ |
---|
| 2138 | |
---|
| 2139 | |
---|
| 2140 | /* Multiplicative inverse of 3, modulo 2^BITS_PER_MP_LIMB. |
---|
| 2141 | 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits. */ |
---|
| 2142 | #define MODLIMB_INVERSE_3 ((GMP_NUMB_MAX / 3) * 2 + 1) |
---|
| 2143 | |
---|
| 2144 | |
---|
| 2145 | /* Set r to -a mod d. a>=d is allowed. Can give r>d. All should be limbs. |
---|
| 2146 | |
---|
| 2147 | It's not clear whether this is the best way to do this calculation. |
---|
| 2148 | Anything congruent to -a would be fine for the one limb congruence |
---|
| 2149 | tests. */ |
---|
| 2150 | |
---|
| 2151 | #define NEG_MOD(r, a, d) \ |
---|
| 2152 | do { \ |
---|
| 2153 | ASSERT ((d) != 0); \ |
---|
| 2154 | ASSERT_LIMB (a); \ |
---|
| 2155 | ASSERT_LIMB (d); \ |
---|
| 2156 | \ |
---|
| 2157 | if ((a) <= (d)) \ |
---|
| 2158 | { \ |
---|
| 2159 | /* small a is reasonably likely */ \ |
---|
| 2160 | (r) = (d) - (a); \ |
---|
| 2161 | } \ |
---|
| 2162 | else \ |
---|
| 2163 | { \ |
---|
| 2164 | unsigned __twos; \ |
---|
| 2165 | mp_limb_t __dnorm; \ |
---|
| 2166 | count_leading_zeros (__twos, d); \ |
---|
| 2167 | __twos -= GMP_NAIL_BITS; \ |
---|
| 2168 | __dnorm = (d) << __twos; \ |
---|
| 2169 | (r) = ((a) <= __dnorm ? __dnorm : 2*__dnorm) - (a); \ |
---|
| 2170 | } \ |
---|
| 2171 | \ |
---|
| 2172 | ASSERT_LIMB (r); \ |
---|
| 2173 | } while (0) |
---|
| 2174 | |
---|
| 2175 | /* A bit mask of all the least significant zero bits of n, or -1 if n==0. */ |
---|
| 2176 | #define LOW_ZEROS_MASK(n) (((n) & -(n)) - 1) |
---|
| 2177 | |
---|
| 2178 | |
---|
| 2179 | /* Set "p" to 1 if there's an odd number of 1 bits in "n", or to 0 if |
---|
| 2180 | there's an even number. */ |
---|
| 2181 | |
---|
| 2182 | #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_FAMILY_x86 |
---|
| 2183 | #define ULONG_PARITY(p, n) \ |
---|
| 2184 | do { \ |
---|
| 2185 | char __p; \ |
---|
| 2186 | unsigned long __n = (n); \ |
---|
| 2187 | __n ^= (__n >> 16); \ |
---|
| 2188 | asm ("xorb %h1, %b1\n" \ |
---|
| 2189 | "setpo %0\n" \ |
---|
| 2190 | : "=qm" (__p), "=q" (__n) \ |
---|
| 2191 | : "1" (__n)); \ |
---|
| 2192 | (p) = __p; \ |
---|
| 2193 | } while (0) |
---|
| 2194 | #else |
---|
| 2195 | #define ULONG_PARITY(p, n) \ |
---|
| 2196 | do { \ |
---|
| 2197 | unsigned long __n = (n); \ |
---|
| 2198 | int __p = 0; \ |
---|
| 2199 | do \ |
---|
| 2200 | { \ |
---|
| 2201 | __p ^= 0x96696996L >> (__n & 0x1F); \ |
---|
| 2202 | __n >>= 5; \ |
---|
| 2203 | } \ |
---|
| 2204 | while (__n != 0); \ |
---|
| 2205 | \ |
---|
| 2206 | (p) = __p; \ |
---|
| 2207 | } while (0) |
---|
[15293] | 2208 | #endif |
---|
| 2209 | |
---|
| 2210 | |
---|
[18190] | 2211 | /* bswap is available on i486 and up and is fast. A combination rorw $8 / |
---|
| 2212 | roll $16 / rorw $8 is used in glibc for plain i386 (and in the linux |
---|
| 2213 | kernel with xchgb instead of rorw), but this is not done here, because |
---|
| 2214 | i386 means generic x86 and mixing word and dword operations will cause |
---|
| 2215 | partial register stalls on P6 chips. */ |
---|
| 2216 | #if defined (__GNUC__) && ! defined (NO_ASM) \ |
---|
| 2217 | && HAVE_HOST_CPU_FAMILY_x86 && ! HAVE_HOST_CPU_i386 \ |
---|
| 2218 | && BITS_PER_MP_LIMB == 32 |
---|
| 2219 | #define BSWAP_LIMB(dst, src) \ |
---|
| 2220 | do { \ |
---|
| 2221 | asm ("bswap %0" : "=r" (dst) : "0" (src)); \ |
---|
| 2222 | } while (0) |
---|
| 2223 | #endif /* x86 */ |
---|
| 2224 | |
---|
| 2225 | #if defined (__GNUC__) && ! defined (NO_ASM) \ |
---|
| 2226 | && defined (__ia64) && BITS_PER_MP_LIMB == 64 |
---|
| 2227 | #define BSWAP_LIMB(dst, src) \ |
---|
| 2228 | do { \ |
---|
| 2229 | asm ("mux1 %0 = %1, @rev" : "=r" (dst) : "r" (src)); \ |
---|
| 2230 | } while (0) |
---|
| 2231 | #endif |
---|
| 2232 | |
---|
| 2233 | #if ! defined (BSWAP_LIMB) |
---|
| 2234 | #if BITS_PER_MP_LIMB == 8 |
---|
| 2235 | #define BSWAP_LIMB(dst, src) \ |
---|
| 2236 | do { (dst) = (src); } while (0) |
---|
| 2237 | #endif |
---|
| 2238 | #if BITS_PER_MP_LIMB == 16 |
---|
| 2239 | #define BSWAP_LIMB(dst, src) \ |
---|
| 2240 | do { \ |
---|
| 2241 | (dst) = ((src) << 8) + ((src) >> 8); \ |
---|
| 2242 | } while (0) |
---|
| 2243 | #endif |
---|
| 2244 | #if BITS_PER_MP_LIMB == 32 |
---|
| 2245 | #define BSWAP_LIMB(dst, src) \ |
---|
| 2246 | do { \ |
---|
| 2247 | (dst) = \ |
---|
| 2248 | ((src) << 24) \ |
---|
| 2249 | + (((src) & 0xFF00) << 8) \ |
---|
| 2250 | + (((src) >> 8) & 0xFF00) \ |
---|
| 2251 | + ((src) >> 24); \ |
---|
| 2252 | } while (0) |
---|
| 2253 | #endif |
---|
| 2254 | #if BITS_PER_MP_LIMB == 64 |
---|
| 2255 | #define BSWAP_LIMB(dst, src) \ |
---|
| 2256 | do { \ |
---|
| 2257 | (dst) = \ |
---|
| 2258 | ((src) << 56) \ |
---|
| 2259 | + (((src) & 0xFF00) << 40) \ |
---|
| 2260 | + (((src) & 0xFF0000) << 24) \ |
---|
| 2261 | + (((src) & 0xFF000000) << 8) \ |
---|
| 2262 | + (((src) >> 8) & 0xFF000000) \ |
---|
| 2263 | + (((src) >> 24) & 0xFF0000) \ |
---|
| 2264 | + (((src) >> 40) & 0xFF00) \ |
---|
| 2265 | + ((src) >> 56); \ |
---|
| 2266 | } while (0) |
---|
| 2267 | #endif |
---|
| 2268 | #endif |
---|
| 2269 | |
---|
| 2270 | |
---|
| 2271 | /* Apparently lwbrx might be slow on some PowerPC chips, so restrict it to |
---|
| 2272 | those we know are fast. */ |
---|
| 2273 | #if defined (__GNUC__) && ! defined (NO_ASM) \ |
---|
| 2274 | && BITS_PER_MP_LIMB == 32 && HAVE_LIMB_BIG_ENDIAN \ |
---|
| 2275 | && (HAVE_HOST_CPU_powerpc604 \ |
---|
| 2276 | || HAVE_HOST_CPU_powerpc604e \ |
---|
| 2277 | || HAVE_HOST_CPU_powerpc750 \ |
---|
| 2278 | || HAVE_HOST_CPU_powerpc7400) |
---|
| 2279 | #define BSWAP_LIMB_FETCH(limb, src) \ |
---|
| 2280 | do { \ |
---|
| 2281 | mp_srcptr __blf_src = (src); \ |
---|
| 2282 | mp_limb_t __limb; \ |
---|
| 2283 | __asm__ ("lwbrx %0, 0, %1" \ |
---|
| 2284 | : "=r" (__limb) \ |
---|
| 2285 | : "r" (__blf_src), \ |
---|
| 2286 | "m" (*__blf_src)); \ |
---|
| 2287 | (limb) = __limb; \ |
---|
| 2288 | } while (0) |
---|
| 2289 | #endif |
---|
| 2290 | |
---|
| 2291 | #if ! defined (BSWAP_LIMB_FETCH) |
---|
| 2292 | #define BSWAP_LIMB_FETCH(limb, src) BSWAP_LIMB (limb, *(src)) |
---|
| 2293 | #endif |
---|
| 2294 | |
---|
| 2295 | |
---|
| 2296 | /* On the same basis that lwbrx might be slow, restrict stwbrx to those we |
---|
| 2297 | know are fast. FIXME: Is this necessary? */ |
---|
| 2298 | #if defined (__GNUC__) && ! defined (NO_ASM) \ |
---|
| 2299 | && BITS_PER_MP_LIMB == 32 && HAVE_LIMB_BIG_ENDIAN \ |
---|
| 2300 | && (HAVE_HOST_CPU_powerpc604 \ |
---|
| 2301 | || HAVE_HOST_CPU_powerpc604e \ |
---|
| 2302 | || HAVE_HOST_CPU_powerpc750 \ |
---|
| 2303 | || HAVE_HOST_CPU_powerpc7400) |
---|
| 2304 | #define BSWAP_LIMB_STORE(dst, limb) \ |
---|
| 2305 | do { \ |
---|
| 2306 | mp_ptr __dst = (dst); \ |
---|
| 2307 | mp_limb_t __limb = (limb); \ |
---|
| 2308 | __asm__ ("stwbrx %1, 0, %2" \ |
---|
| 2309 | : "=m" (*__dst) \ |
---|
| 2310 | : "r" (__limb), \ |
---|
| 2311 | "r" (__dst)); \ |
---|
| 2312 | } while (0) |
---|
| 2313 | #endif |
---|
| 2314 | |
---|
| 2315 | #if ! defined (BSWAP_LIMB_STORE) |
---|
| 2316 | #define BSWAP_LIMB_STORE(dst, limb) BSWAP_LIMB (*(dst), limb) |
---|
| 2317 | #endif |
---|
| 2318 | |
---|
| 2319 | |
---|
| 2320 | /* Byte swap limbs from {src,size} and store at {dst,size}. */ |
---|
| 2321 | #define MPN_BSWAP(dst, src, size) \ |
---|
| 2322 | do { \ |
---|
| 2323 | mp_ptr __dst = (dst); \ |
---|
| 2324 | mp_srcptr __src = (src); \ |
---|
| 2325 | mp_size_t __size = (size); \ |
---|
| 2326 | mp_size_t __i; \ |
---|
| 2327 | ASSERT ((size) >= 0); \ |
---|
| 2328 | ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); \ |
---|
| 2329 | CRAY_Pragma ("_CRI ivdep"); \ |
---|
| 2330 | for (__i = 0; __i < __size; __i++) \ |
---|
| 2331 | { \ |
---|
| 2332 | BSWAP_LIMB_FETCH (*__dst, __src); \ |
---|
| 2333 | __dst++; \ |
---|
| 2334 | __src++; \ |
---|
| 2335 | } \ |
---|
| 2336 | } while (0) |
---|
| 2337 | |
---|
| 2338 | /* Byte swap limbs from {dst,size} and store in reverse order at {src,size}. */ |
---|
| 2339 | #define MPN_BSWAP_REVERSE(dst, src, size) \ |
---|
| 2340 | do { \ |
---|
| 2341 | mp_ptr __dst = (dst); \ |
---|
| 2342 | mp_size_t __size = (size); \ |
---|
| 2343 | mp_srcptr __src = (src) + __size - 1; \ |
---|
| 2344 | mp_size_t __i; \ |
---|
| 2345 | ASSERT ((size) >= 0); \ |
---|
| 2346 | ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \ |
---|
| 2347 | CRAY_Pragma ("_CRI ivdep"); \ |
---|
| 2348 | for (__i = 0; __i < __size; __i++) \ |
---|
| 2349 | { \ |
---|
| 2350 | BSWAP_LIMB_FETCH (*__dst, __src); \ |
---|
| 2351 | __dst++; \ |
---|
| 2352 | __src--; \ |
---|
| 2353 | } \ |
---|
| 2354 | } while (0) |
---|
| 2355 | |
---|
| 2356 | |
---|
| 2357 | /* No processor claiming to be SPARC v9 compliant seems to |
---|
| 2358 | implement the POPC instruction. Disable pattern for now. */ |
---|
| 2359 | #if 0 |
---|
| 2360 | #if defined __GNUC__ && defined __sparc_v9__ && BITS_PER_MP_LIMB == 64 |
---|
| 2361 | #define popc_limb(result, input) \ |
---|
| 2362 | do { \ |
---|
| 2363 | DItype __res; \ |
---|
| 2364 | asm ("popc %1,%0" : "=r" (result) : "rI" (input)); \ |
---|
| 2365 | } while (0) |
---|
| 2366 | #endif |
---|
| 2367 | #endif |
---|
| 2368 | |
---|
| 2369 | /* Cool population count of an mp_limb_t. |
---|
| 2370 | You have to figure out how this works, We won't tell you! |
---|
| 2371 | |
---|
| 2372 | The constants could also be expressed as: |
---|
| 2373 | 0xAA... = [2^(N+1) / 3] = [(2^N-1)/3*2] |
---|
| 2374 | 0x33... = [2^N / 5] = [(2^N-1)/5] |
---|
| 2375 | 0x0f... = [2^N / 17] = [(2^N-1)/17] |
---|
| 2376 | (N is BITS_PER_MP_LIMB, [] denotes truncation.) */ |
---|
| 2377 | |
---|
| 2378 | #if ! defined (popc_limb) && BITS_PER_MP_LIMB == 64 |
---|
| 2379 | #define popc_limb(result, input) \ |
---|
| 2380 | do { \ |
---|
| 2381 | mp_limb_t __x = (input); \ |
---|
| 2382 | __x -= (__x & CNST_LIMB(0xaaaaaaaaaaaaaaaa)) >> 1; \ |
---|
| 2383 | __x = ((__x >> 2) & CNST_LIMB(0x3333333333333333)) \ |
---|
| 2384 | + (__x & CNST_LIMB(0x3333333333333333)); \ |
---|
| 2385 | __x = ((__x >> 4) + __x) & CNST_LIMB(0x0f0f0f0f0f0f0f0f); \ |
---|
| 2386 | __x = ((__x >> 8) + __x); \ |
---|
| 2387 | __x = ((__x >> 16) + __x); \ |
---|
| 2388 | __x = ((__x >> 32) + __x) & 0xff; \ |
---|
| 2389 | (result) = __x; \ |
---|
| 2390 | } while (0) |
---|
| 2391 | #endif |
---|
| 2392 | #if ! defined (popc_limb) && BITS_PER_MP_LIMB == 32 |
---|
| 2393 | #define popc_limb(result, input) \ |
---|
| 2394 | do { \ |
---|
| 2395 | mp_limb_t __x = (input); \ |
---|
| 2396 | __x -= (__x & 0xaaaaaaaaL) >> 1; \ |
---|
| 2397 | __x = ((__x >> 2) & 0x33333333L) + (__x & 0x33333333L); \ |
---|
| 2398 | __x = ((__x >> 4) + __x) & 0x0f0f0f0fL; \ |
---|
| 2399 | __x = ((__x >> 8) + __x); \ |
---|
| 2400 | __x = ((__x >> 16) + __x) & 0xff; \ |
---|
| 2401 | (result) = __x; \ |
---|
| 2402 | } while (0) |
---|
| 2403 | #endif |
---|
| 2404 | #if ! defined (popc_limb) && BITS_PER_MP_LIMB == 16 |
---|
| 2405 | #define popc_limb(result, input) \ |
---|
| 2406 | do { \ |
---|
| 2407 | mp_limb_t __x = (input); \ |
---|
| 2408 | __x -= (__x & 0xaaaa) >> 1; \ |
---|
| 2409 | __x = ((__x >> 2) & 0x3333) + (__x & 0x3333); \ |
---|
| 2410 | __x = ((__x >> 4) + __x) & 0x0f0f; \ |
---|
| 2411 | __x = ((__x >> 8) + __x) & 0xff; \ |
---|
| 2412 | (result) = __x; \ |
---|
| 2413 | } while (0) |
---|
| 2414 | #endif |
---|
| 2415 | #if ! defined (popc_limb) && BITS_PER_MP_LIMB == 8 |
---|
| 2416 | #define popc_limb(result, input) \ |
---|
| 2417 | do { \ |
---|
| 2418 | mp_limb_t __x = (input); \ |
---|
| 2419 | __x -= (__x & 0xaa) >> 1; \ |
---|
| 2420 | __x = ((__x >> 2) & 0x33) + (__x & 0x33); \ |
---|
| 2421 | __x = ((__x >> 4) + __x) & 0xf; \ |
---|
| 2422 | (result) = __x; \ |
---|
| 2423 | } while (0) |
---|
| 2424 | #endif |
---|
| 2425 | #if ! defined (popc_limb) && BITS_PER_MP_LIMB == 4 |
---|
| 2426 | #define popc_limb(result, input) \ |
---|
| 2427 | do { \ |
---|
| 2428 | mp_limb_t __x = (input); \ |
---|
| 2429 | __x = (__x & 1) + ((__x >> 1) & 1) + ((__x >> 2) & 1) + ((__x >> 3) & 1); \ |
---|
| 2430 | (result) = __x; \ |
---|
| 2431 | } while (0) |
---|
| 2432 | #endif |
---|
| 2433 | |
---|
| 2434 | |
---|
[15293] | 2435 | /* Define stuff for longlong.h. */ |
---|
[18190] | 2436 | #if HAVE_ATTRIBUTE_MODE |
---|
[15293] | 2437 | typedef unsigned int UQItype __attribute__ ((mode (QI))); |
---|
| 2438 | typedef int SItype __attribute__ ((mode (SI))); |
---|
| 2439 | typedef unsigned int USItype __attribute__ ((mode (SI))); |
---|
| 2440 | typedef int DItype __attribute__ ((mode (DI))); |
---|
| 2441 | typedef unsigned int UDItype __attribute__ ((mode (DI))); |
---|
| 2442 | #else |
---|
| 2443 | typedef unsigned char UQItype; |
---|
| 2444 | typedef long SItype; |
---|
| 2445 | typedef unsigned long USItype; |
---|
[18190] | 2446 | #if HAVE_LONG_LONG |
---|
[15293] | 2447 | typedef long long int DItype; |
---|
| 2448 | typedef unsigned long long int UDItype; |
---|
| 2449 | #else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */ |
---|
| 2450 | typedef long int DItype; |
---|
| 2451 | typedef unsigned long int UDItype; |
---|
| 2452 | #endif |
---|
| 2453 | #endif |
---|
| 2454 | |
---|
| 2455 | typedef mp_limb_t UWtype; |
---|
| 2456 | typedef unsigned int UHWtype; |
---|
| 2457 | #define W_TYPE_SIZE BITS_PER_MP_LIMB |
---|
| 2458 | |
---|
| 2459 | /* Define ieee_double_extract and _GMP_IEEE_FLOATS. */ |
---|
| 2460 | |
---|
| 2461 | #if (defined (__arm__) && (defined (__ARMWEL__) || defined (__linux__))) |
---|
| 2462 | /* Special case for little endian ARM since floats remain in big-endian. */ |
---|
| 2463 | #define _GMP_IEEE_FLOATS 1 |
---|
| 2464 | union ieee_double_extract |
---|
| 2465 | { |
---|
| 2466 | struct |
---|
| 2467 | { |
---|
| 2468 | unsigned int manh:20; |
---|
| 2469 | unsigned int exp:11; |
---|
| 2470 | unsigned int sig:1; |
---|
| 2471 | unsigned int manl:32; |
---|
| 2472 | } s; |
---|
| 2473 | double d; |
---|
| 2474 | }; |
---|
| 2475 | #else |
---|
| 2476 | #if defined (_LITTLE_ENDIAN) || defined (__LITTLE_ENDIAN__) \ |
---|
| 2477 | || defined (__alpha) \ |
---|
| 2478 | || defined (__clipper__) \ |
---|
| 2479 | || defined (__cris) \ |
---|
| 2480 | || defined (__i386__) \ |
---|
[18190] | 2481 | || defined (__x86_64__) \ |
---|
[15293] | 2482 | || defined (__i860__) \ |
---|
| 2483 | || defined (__i960__) \ |
---|
[18190] | 2484 | || defined (__ia64) \ |
---|
[15293] | 2485 | || defined (MIPSEL) || defined (_MIPSEL) \ |
---|
| 2486 | || defined (__ns32000__) \ |
---|
| 2487 | || defined (__WINNT) || defined (_WIN32) |
---|
| 2488 | #define _GMP_IEEE_FLOATS 1 |
---|
| 2489 | union ieee_double_extract |
---|
| 2490 | { |
---|
| 2491 | struct |
---|
| 2492 | { |
---|
| 2493 | unsigned int manl:32; |
---|
| 2494 | unsigned int manh:20; |
---|
| 2495 | unsigned int exp:11; |
---|
| 2496 | unsigned int sig:1; |
---|
| 2497 | } s; |
---|
| 2498 | double d; |
---|
| 2499 | }; |
---|
| 2500 | #else /* Need this as an #else since the tests aren't made exclusive. */ |
---|
[18190] | 2501 | #if defined (__mc68000__) || defined (__mc68020__) || defined (__m68k__)\ |
---|
| 2502 | || defined(mc68020) |
---|
| 2503 | #define _GMP_IEEE_FLOATS 1 |
---|
| 2504 | union ieee_double_extract |
---|
| 2505 | { |
---|
| 2506 | struct |
---|
| 2507 | { |
---|
| 2508 | /* "int" might be only 16 bits, so use "long" */ |
---|
| 2509 | unsigned long sig:1; |
---|
| 2510 | unsigned long exp:11; |
---|
| 2511 | unsigned long manh:20; |
---|
| 2512 | unsigned long manl:32; |
---|
| 2513 | } s; |
---|
| 2514 | double d; |
---|
| 2515 | }; |
---|
| 2516 | #else |
---|
[15293] | 2517 | #if defined (_BIG_ENDIAN) || defined (__BIG_ENDIAN__) \ |
---|
| 2518 | || defined (__a29k__) || defined (_AM29K) \ |
---|
| 2519 | || defined (__arm__) \ |
---|
| 2520 | || (defined (__convex__) && defined (_IEEE_FLOAT_)) \ |
---|
[18190] | 2521 | || defined (_CRAYMPP) || defined (_CRAYIEEE) \ |
---|
[15293] | 2522 | || defined (__i370__) || defined (__mvs__) \ |
---|
| 2523 | || defined (__m88000__) \ |
---|
| 2524 | || defined (MIPSEB) || defined (_MIPSEB) \ |
---|
| 2525 | || defined (__hppa) || defined (__hppa__) \ |
---|
| 2526 | || defined (__pyr__) \ |
---|
| 2527 | || defined (__ibm032__) \ |
---|
| 2528 | || defined (_IBMR2) || defined (_ARCH_PPC) \ |
---|
| 2529 | || defined (__sh__) \ |
---|
[18190] | 2530 | || defined (__sparc) || defined (sparc) || defined (__sparc__) \ |
---|
[15293] | 2531 | || defined (__we32k__) |
---|
| 2532 | #define _GMP_IEEE_FLOATS 1 |
---|
| 2533 | union ieee_double_extract |
---|
| 2534 | { |
---|
| 2535 | struct |
---|
| 2536 | { |
---|
| 2537 | unsigned int sig:1; |
---|
| 2538 | unsigned int exp:11; |
---|
| 2539 | unsigned int manh:20; |
---|
| 2540 | unsigned int manl:32; |
---|
| 2541 | } s; |
---|
| 2542 | double d; |
---|
| 2543 | }; |
---|
| 2544 | #endif |
---|
| 2545 | #endif |
---|
| 2546 | #endif |
---|
| 2547 | #endif |
---|
| 2548 | |
---|
[18190] | 2549 | /* Use (4.0 * ...) instead of (2.0 * ...) to work around buggy compilers |
---|
| 2550 | that don't convert ulong->double correctly (eg. SunOS 4 native cc). */ |
---|
| 2551 | #define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (GMP_NUMB_BITS - 2))) |
---|
| 2552 | /* Maximum number of limbs it will take to store any `double'. |
---|
| 2553 | We assume doubles have 53 mantissam bits. */ |
---|
| 2554 | #define LIMBS_PER_DOUBLE ((53 + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS + 1) |
---|
| 2555 | |
---|
| 2556 | double __gmp_scale2 _PROTO ((double, int)) ATTRIBUTE_CONST; |
---|
[15293] | 2557 | int __gmp_extract_double _PROTO ((mp_ptr, double)); |
---|
| 2558 | |
---|
| 2559 | extern int __gmp_junk; |
---|
| 2560 | extern const int __gmp_0; |
---|
[18190] | 2561 | void __gmp_exception _PROTO ((int)) ATTRIBUTE_NORETURN; |
---|
| 2562 | void __gmp_divide_by_zero _PROTO ((void)) ATTRIBUTE_NORETURN; |
---|
| 2563 | void __gmp_sqrt_of_negative _PROTO ((void)) ATTRIBUTE_NORETURN; |
---|
| 2564 | #define GMP_ERROR(code) __gmp_exception (code) |
---|
| 2565 | #define DIVIDE_BY_ZERO __gmp_divide_by_zero () |
---|
| 2566 | #define SQRT_OF_NEGATIVE __gmp_sqrt_of_negative () |
---|
[15293] | 2567 | |
---|
| 2568 | #if defined _LONG_LONG_LIMB |
---|
[18190] | 2569 | #if __GMP_HAVE_TOKEN_PASTE |
---|
| 2570 | #define CNST_LIMB(C) ((mp_limb_t) C##LL) |
---|
[15293] | 2571 | #else |
---|
[18190] | 2572 | #define CNST_LIMB(C) ((mp_limb_t) C/**/LL) |
---|
[15293] | 2573 | #endif |
---|
| 2574 | #else /* not _LONG_LONG_LIMB */ |
---|
[18190] | 2575 | #if __GMP_HAVE_TOKEN_PASTE |
---|
| 2576 | #define CNST_LIMB(C) ((mp_limb_t) C##L) |
---|
[15293] | 2577 | #else |
---|
[18190] | 2578 | #define CNST_LIMB(C) ((mp_limb_t) C/**/L) |
---|
[15293] | 2579 | #endif |
---|
| 2580 | #endif /* _LONG_LONG_LIMB */ |
---|
| 2581 | |
---|
[18190] | 2582 | /* Stuff used by mpn/generic/perfsqr.c and mpz/prime_p.c */ |
---|
| 2583 | #if GMP_NUMB_BITS == 2 |
---|
| 2584 | #define PP 0x3 /* 3 */ |
---|
| 2585 | #define PP_FIRST_OMITTED 5 |
---|
| 2586 | #endif |
---|
| 2587 | #if GMP_NUMB_BITS == 4 |
---|
| 2588 | #define PP 0xF /* 3 x 5 */ |
---|
| 2589 | #define PP_FIRST_OMITTED 7 |
---|
| 2590 | #endif |
---|
| 2591 | #if GMP_NUMB_BITS == 8 |
---|
| 2592 | #define PP 0x69 /* 3 x 5 x 7 */ |
---|
| 2593 | #define PP_FIRST_OMITTED 11 |
---|
| 2594 | #endif |
---|
| 2595 | #if GMP_NUMB_BITS == 16 |
---|
| 2596 | #define PP 0x3AA7 /* 3 x 5 x 7 x 11 x 13 */ |
---|
| 2597 | #define PP_FIRST_OMITTED 17 |
---|
| 2598 | #endif |
---|
| 2599 | #if GMP_NUMB_BITS == 32 |
---|
| 2600 | #define PP 0xC0CFD797L /* 3 x 5 x 7 x 11 x ... x 29 */ |
---|
[15293] | 2601 | #define PP_INVERTED 0x53E5645CL |
---|
[18190] | 2602 | #define PP_FIRST_OMITTED 31 |
---|
[15293] | 2603 | #endif |
---|
[18190] | 2604 | #if GMP_NUMB_BITS == 64 |
---|
| 2605 | #define PP CNST_LIMB(0xE221F97C30E94E1D) /* 3 x 5 x 7 x 11 x ... x 53 */ |
---|
[15293] | 2606 | #define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B) |
---|
[18190] | 2607 | #define PP_FIRST_OMITTED 59 |
---|
[15293] | 2608 | #endif |
---|
[18190] | 2609 | #ifndef PP_FIRST_OMITTED |
---|
| 2610 | #define PP_FIRST_OMITTED 3 |
---|
| 2611 | #endif |
---|
[15293] | 2612 | |
---|
| 2613 | |
---|
[18190] | 2614 | |
---|
[15293] | 2615 | /* BIT1 means a result value in bit 1 (second least significant bit), with a |
---|
| 2616 | zero bit representing +1 and a one bit representing -1. Bits other than |
---|
[18190] | 2617 | bit 1 are garbage. These are meant to be kept in "int"s, and casts are |
---|
| 2618 | used to ensure the expressions are "int"s even if a and/or b might be |
---|
| 2619 | other types. |
---|
[15293] | 2620 | |
---|
| 2621 | JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base |
---|
| 2622 | and their speed is important. Expressions are used rather than |
---|
| 2623 | conditionals to accumulate sign changes, which effectively means XORs |
---|
| 2624 | instead of conditional JUMPs. */ |
---|
| 2625 | |
---|
| 2626 | /* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */ |
---|
[18190] | 2627 | #define JACOBI_S0(a) (((a) == 1) | ((a) == -1)) |
---|
[15293] | 2628 | |
---|
| 2629 | /* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */ |
---|
[18190] | 2630 | #define JACOBI_U0(a) ((a) == 1) |
---|
[15293] | 2631 | |
---|
[18190] | 2632 | /* (a/0), with a given by low and size; |
---|
| 2633 | is 1 if a=+/-1, 0 otherwise */ |
---|
| 2634 | #define JACOBI_LS0(alow,asize) \ |
---|
| 2635 | (((asize) == 1 || (asize) == -1) && (alow) == 1) |
---|
[15293] | 2636 | |
---|
[18190] | 2637 | /* (a/0), with a an mpz_t; |
---|
| 2638 | fetch of low limb always valid, even if size is zero */ |
---|
| 2639 | #define JACOBI_Z0(a) JACOBI_LS0 (PTR(a)[0], SIZ(a)) |
---|
| 2640 | |
---|
| 2641 | /* (0/b), with b unsigned; is 1 if b=+/-1, 0 otherwise */ |
---|
| 2642 | #define JACOBI_0U(b) ((b) == 1) |
---|
| 2643 | |
---|
| 2644 | /* (0/b), with b unsigned; is 1 if b=+/-1, 0 otherwise */ |
---|
| 2645 | #define JACOBI_0S(b) ((b) == 1 || (b) == -1) |
---|
| 2646 | |
---|
| 2647 | /* (0/b), with b given by low and size; is 1 if b=+/-1, 0 otherwise */ |
---|
| 2648 | #define JACOBI_0LS(blow,bsize) \ |
---|
| 2649 | (((bsize) == 1 || (bsize) == -1) && (blow) == 1) |
---|
| 2650 | |
---|
[15293] | 2651 | /* Convert a bit1 to +1 or -1. */ |
---|
| 2652 | #define JACOBI_BIT1_TO_PN(result_bit1) \ |
---|
[18190] | 2653 | (1 - ((int) (result_bit1) & 2)) |
---|
[15293] | 2654 | |
---|
| 2655 | /* (2/b), with b unsigned and odd; |
---|
| 2656 | is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and |
---|
| 2657 | hence obtained from (b>>1)^b */ |
---|
| 2658 | #define JACOBI_TWO_U_BIT1(b) \ |
---|
[18190] | 2659 | ((int) (((b) >> 1) ^ (b))) |
---|
[15293] | 2660 | |
---|
| 2661 | /* (2/b)^twos, with b unsigned and odd */ |
---|
| 2662 | #define JACOBI_TWOS_U_BIT1(twos, b) \ |
---|
[18190] | 2663 | ((int) ((twos) << 1) & JACOBI_TWO_U_BIT1 (b)) |
---|
[15293] | 2664 | |
---|
| 2665 | /* (2/b)^twos, with b unsigned and odd */ |
---|
| 2666 | #define JACOBI_TWOS_U(twos, b) \ |
---|
| 2667 | (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b))) |
---|
| 2668 | |
---|
[18190] | 2669 | /* (-1/b), with b odd (signed or unsigned); |
---|
| 2670 | is (-1)^((b-1)/2) */ |
---|
| 2671 | #define JACOBI_N1B_BIT1(b) \ |
---|
| 2672 | ((int) (b)) |
---|
| 2673 | |
---|
[15293] | 2674 | /* (a/b) effect due to sign of a: signed/unsigned, b odd; |
---|
[18190] | 2675 | is (-1/b) if a<0, or +1 if a>=0 */ |
---|
[15293] | 2676 | #define JACOBI_ASGN_SU_BIT1(a, b) \ |
---|
[18190] | 2677 | ((((a) < 0) << 1) & JACOBI_N1B_BIT1(b)) |
---|
[15293] | 2678 | |
---|
[18190] | 2679 | /* (a/b) effect due to sign of b: signed/signed; |
---|
| 2680 | is -1 if a and b both negative, +1 otherwise */ |
---|
| 2681 | #define JACOBI_BSGN_SS_BIT1(a, b) \ |
---|
| 2682 | ((((a)<0) & ((b)<0)) << 1) |
---|
| 2683 | |
---|
[15293] | 2684 | /* (a/b) effect due to sign of b: signed/mpz; |
---|
| 2685 | is -1 if a and b both negative, +1 otherwise */ |
---|
| 2686 | #define JACOBI_BSGN_SZ_BIT1(a, b) \ |
---|
[18190] | 2687 | JACOBI_BSGN_SS_BIT1 (a, SIZ(b)) |
---|
[15293] | 2688 | |
---|
[18190] | 2689 | /* (a/b) effect due to sign of b: mpz/signed; |
---|
| 2690 | is -1 if a and b both negative, +1 otherwise */ |
---|
[15293] | 2691 | #define JACOBI_BSGN_ZS_BIT1(a, b) \ |
---|
[18190] | 2692 | JACOBI_BSGN_SZ_BIT1 (b, a) |
---|
[15293] | 2693 | |
---|
[18190] | 2694 | /* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd; |
---|
| 2695 | is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4, or -1 if |
---|
[15293] | 2696 | both a,b==3mod4, achieved in bit 1 by a&b. No ASSERT()s about a,b odd |
---|
| 2697 | because this is used in a couple of places with only bit 1 of a or b |
---|
| 2698 | valid. */ |
---|
| 2699 | #define JACOBI_RECIP_UU_BIT1(a, b) \ |
---|
[18190] | 2700 | ((int) ((a) & (b))) |
---|
[15293] | 2701 | |
---|
| 2702 | |
---|
[18190] | 2703 | /* Set a_rem to {a_ptr,a_size} reduced modulo b, either using mod_1 or |
---|
| 2704 | modexact_1_odd, but in either case leaving a_rem<b. b must be odd and |
---|
| 2705 | unsigned. modexact_1_odd effectively calculates -a mod b, and |
---|
| 2706 | result_bit1 is adjusted for the factor of -1. |
---|
| 2707 | |
---|
| 2708 | The way mpn_modexact_1_odd sometimes bases its remainder on a_size and |
---|
| 2709 | sometimes on a_size-1 means if GMP_NUMB_BITS is odd we can't know what |
---|
| 2710 | factor to introduce into result_bit1, so for that case use mpn_mod_1 |
---|
| 2711 | unconditionally. |
---|
| 2712 | |
---|
| 2713 | FIXME: mpn_modexact_1_odd is more efficient, so some way to get it used |
---|
| 2714 | for odd GMP_NUMB_BITS would be good. Perhaps it could mung its result, |
---|
| 2715 | or not skip a divide step, or something. */ |
---|
| 2716 | |
---|
| 2717 | #define JACOBI_MOD_OR_MODEXACT_1_ODD(result_bit1, a_rem, a_ptr, a_size, b) \ |
---|
| 2718 | do { \ |
---|
| 2719 | mp_srcptr __a_ptr = (a_ptr); \ |
---|
| 2720 | mp_size_t __a_size = (a_size); \ |
---|
| 2721 | mp_limb_t __b = (b); \ |
---|
| 2722 | \ |
---|
| 2723 | ASSERT (__a_size >= 1); \ |
---|
| 2724 | ASSERT (__b & 1); \ |
---|
| 2725 | \ |
---|
| 2726 | if ((GMP_NUMB_BITS % 2) != 0 \ |
---|
| 2727 | || BELOW_THRESHOLD (__a_size, MODEXACT_1_ODD_THRESHOLD)) \ |
---|
| 2728 | { \ |
---|
| 2729 | (a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b); \ |
---|
| 2730 | } \ |
---|
| 2731 | else \ |
---|
| 2732 | { \ |
---|
| 2733 | (result_bit1) ^= JACOBI_N1B_BIT1 (__b); \ |
---|
| 2734 | (a_rem) = mpn_modexact_1_odd (__a_ptr, __a_size, __b); \ |
---|
| 2735 | } \ |
---|
| 2736 | } while (0) |
---|
| 2737 | |
---|
| 2738 | |
---|
| 2739 | /* __GMPF_BITS_TO_PREC applies a minimum 53 bits, rounds upwards to a whole |
---|
| 2740 | limb and adds an extra limb. __GMPF_PREC_TO_BITS drops that extra limb, |
---|
| 2741 | hence giving back the user's size in bits rounded up. Notice that |
---|
| 2742 | converting prec->bits->prec gives an unchanged value. */ |
---|
| 2743 | #define __GMPF_BITS_TO_PREC(n) \ |
---|
| 2744 | ((mp_size_t) ((__GMP_MAX (53, n) + 2 * GMP_NUMB_BITS - 1) / GMP_NUMB_BITS)) |
---|
| 2745 | #define __GMPF_PREC_TO_BITS(n) \ |
---|
| 2746 | ((unsigned long) (n) * GMP_NUMB_BITS - GMP_NUMB_BITS) |
---|
| 2747 | |
---|
| 2748 | extern mp_size_t __gmp_default_fp_limb_precision; |
---|
| 2749 | |
---|
| 2750 | |
---|
| 2751 | /* Set n to the number of significant digits an mpf of the given _mp_prec |
---|
| 2752 | field, in the given base. This is a rounded up value, designed to ensure |
---|
| 2753 | there's enough digits to reproduce all the guaranteed part of the value. |
---|
| 2754 | |
---|
| 2755 | There are prec many limbs, but the high might be only "1" so forget it |
---|
| 2756 | and just count prec-1 limbs into chars. +1 rounds that upwards, and a |
---|
| 2757 | further +1 is because the limbs usually won't fall on digit boundaries. |
---|
| 2758 | |
---|
| 2759 | FIXME: If base is a power of 2 and the bits per digit divides |
---|
| 2760 | BITS_PER_MP_LIMB then the +2 is unnecessary. This happens always for |
---|
| 2761 | base==2, and in base==16 with the current 32 or 64 bit limb sizes. */ |
---|
| 2762 | |
---|
| 2763 | #define MPF_SIGNIFICANT_DIGITS(n, base, prec) \ |
---|
| 2764 | do { \ |
---|
| 2765 | ASSERT (base >= 2 && base < numberof (mp_bases)); \ |
---|
| 2766 | (n) = 2 + (size_t) ((((prec) - 1) * BITS_PER_MP_LIMB) \ |
---|
| 2767 | * mp_bases[(base)].chars_per_bit_exactly); \ |
---|
| 2768 | } while (0) |
---|
| 2769 | |
---|
| 2770 | |
---|
| 2771 | #define DOPRNT_CONV_FIXED 1 |
---|
| 2772 | #define DOPRNT_CONV_SCIENTIFIC 2 |
---|
| 2773 | #define DOPRNT_CONV_GENERAL 3 |
---|
| 2774 | |
---|
| 2775 | #define DOPRNT_JUSTIFY_NONE 0 |
---|
| 2776 | #define DOPRNT_JUSTIFY_LEFT 1 |
---|
| 2777 | #define DOPRNT_JUSTIFY_RIGHT 2 |
---|
| 2778 | #define DOPRNT_JUSTIFY_INTERNAL 3 |
---|
| 2779 | |
---|
| 2780 | #define DOPRNT_SHOWBASE_YES 1 |
---|
| 2781 | #define DOPRNT_SHOWBASE_NO 2 |
---|
| 2782 | #define DOPRNT_SHOWBASE_NONZERO 3 |
---|
| 2783 | |
---|
| 2784 | struct doprnt_params_t { |
---|
| 2785 | int base; /* negative for upper case */ |
---|
| 2786 | int conv; /* choices above */ |
---|
| 2787 | const char *expfmt; /* exponent format */ |
---|
| 2788 | int exptimes4; /* exponent multiply by 4 */ |
---|
| 2789 | char fill; /* character */ |
---|
| 2790 | int justify; /* choices above */ |
---|
| 2791 | int prec; /* prec field, or -1 for all digits */ |
---|
| 2792 | int showbase; /* choices above */ |
---|
| 2793 | int showpoint; /* if radix point always shown */ |
---|
| 2794 | int showtrailing; /* if trailing zeros wanted */ |
---|
| 2795 | char sign; /* '+', ' ', or '\0' */ |
---|
| 2796 | int width; /* width field */ |
---|
| 2797 | }; |
---|
| 2798 | |
---|
| 2799 | #if _GMP_H_HAVE_VA_LIST |
---|
| 2800 | |
---|
| 2801 | typedef int (*doprnt_format_t) _PROTO ((void *data, const char *fmt, va_list ap)); |
---|
| 2802 | typedef int (*doprnt_memory_t) _PROTO ((void *data, const char *str, size_t len)); |
---|
| 2803 | typedef int (*doprnt_reps_t) _PROTO ((void *data, int c, int reps)); |
---|
| 2804 | typedef int (*doprnt_final_t) _PROTO ((void *data)); |
---|
| 2805 | |
---|
| 2806 | struct doprnt_funs_t { |
---|
| 2807 | doprnt_format_t format; |
---|
| 2808 | doprnt_memory_t memory; |
---|
| 2809 | doprnt_reps_t reps; |
---|
| 2810 | doprnt_final_t final; /* NULL if not required */ |
---|
| 2811 | }; |
---|
| 2812 | |
---|
| 2813 | extern const struct doprnt_funs_t __gmp_fprintf_funs; |
---|
| 2814 | extern const struct doprnt_funs_t __gmp_sprintf_funs; |
---|
| 2815 | extern const struct doprnt_funs_t __gmp_snprintf_funs; |
---|
| 2816 | extern const struct doprnt_funs_t __gmp_obstack_printf_funs; |
---|
| 2817 | extern const struct doprnt_funs_t __gmp_ostream_funs; |
---|
| 2818 | |
---|
| 2819 | /* "buf" is a __gmp_allocate_func block of "alloc" many bytes. The first |
---|
| 2820 | "size" of these have been written. "alloc > size" is maintained, so |
---|
| 2821 | there's room to store a '\0' at the end. "result" is where the |
---|
| 2822 | application wants the final block pointer. */ |
---|
| 2823 | struct gmp_asprintf_t { |
---|
| 2824 | char **result; |
---|
| 2825 | char *buf; |
---|
| 2826 | size_t size; |
---|
| 2827 | size_t alloc; |
---|
| 2828 | }; |
---|
| 2829 | |
---|
| 2830 | #define GMP_ASPRINTF_T_INIT(d, output) \ |
---|
| 2831 | do { \ |
---|
| 2832 | (d).result = (output); \ |
---|
| 2833 | (d).alloc = 256; \ |
---|
| 2834 | (d).buf = (char *) (*__gmp_allocate_func) ((d).alloc); \ |
---|
| 2835 | (d).size = 0; \ |
---|
| 2836 | } while (0) |
---|
| 2837 | |
---|
| 2838 | /* If a realloc is necessary, use twice the size actually required, so as to |
---|
| 2839 | avoid repeated small reallocs. */ |
---|
| 2840 | #define GMP_ASPRINTF_T_NEED(d, n) \ |
---|
| 2841 | do { \ |
---|
| 2842 | size_t alloc, newsize, newalloc; \ |
---|
| 2843 | ASSERT ((d)->alloc >= (d)->size + 1); \ |
---|
| 2844 | \ |
---|
| 2845 | alloc = (d)->alloc; \ |
---|
| 2846 | newsize = (d)->size + (n); \ |
---|
| 2847 | if (alloc <= newsize) \ |
---|
| 2848 | { \ |
---|
| 2849 | newalloc = 2*newsize; \ |
---|
| 2850 | (d)->alloc = newalloc; \ |
---|
| 2851 | (d)->buf = __GMP_REALLOCATE_FUNC_TYPE ((d)->buf, \ |
---|
| 2852 | alloc, newalloc, char); \ |
---|
| 2853 | } \ |
---|
| 2854 | } while (0) |
---|
| 2855 | |
---|
| 2856 | __GMP_DECLSPEC int __gmp_asprintf_memory _PROTO ((struct gmp_asprintf_t *d, const char *str, size_t len)); |
---|
| 2857 | __GMP_DECLSPEC int __gmp_asprintf_reps _PROTO ((struct gmp_asprintf_t *d, int c, int reps)); |
---|
| 2858 | __GMP_DECLSPEC int __gmp_asprintf_final _PROTO ((struct gmp_asprintf_t *d)); |
---|
| 2859 | |
---|
| 2860 | /* buf is where to write the next output, and size is how much space is left |
---|
| 2861 | there. If the application passed size==0 then that's what we'll have |
---|
| 2862 | here, and nothing at all should be written. */ |
---|
| 2863 | struct gmp_snprintf_t { |
---|
| 2864 | char *buf; |
---|
| 2865 | size_t size; |
---|
| 2866 | }; |
---|
| 2867 | |
---|
| 2868 | /* Add the bytes printed by the call to the total retval, or bail out on an |
---|
| 2869 | error. */ |
---|
| 2870 | #define DOPRNT_ACCUMULATE(call) \ |
---|
| 2871 | do { \ |
---|
| 2872 | int __ret; \ |
---|
| 2873 | __ret = call; \ |
---|
| 2874 | if (__ret == -1) \ |
---|
| 2875 | goto error; \ |
---|
| 2876 | retval += __ret; \ |
---|
| 2877 | } while (0) |
---|
| 2878 | #define DOPRNT_ACCUMULATE_FUN(fun, params) \ |
---|
| 2879 | do { \ |
---|
| 2880 | ASSERT ((fun) != NULL); \ |
---|
| 2881 | DOPRNT_ACCUMULATE ((*(fun)) params); \ |
---|
| 2882 | } while (0) |
---|
| 2883 | |
---|
| 2884 | #define DOPRNT_FORMAT(fmt, ap) \ |
---|
| 2885 | DOPRNT_ACCUMULATE_FUN (funs->format, (data, fmt, ap)) |
---|
| 2886 | #define DOPRNT_MEMORY(ptr, len) \ |
---|
| 2887 | DOPRNT_ACCUMULATE_FUN (funs->memory, (data, ptr, len)) |
---|
| 2888 | #define DOPRNT_REPS(c, n) \ |
---|
| 2889 | DOPRNT_ACCUMULATE_FUN (funs->reps, (data, c, n)) |
---|
| 2890 | |
---|
| 2891 | #define DOPRNT_STRING(str) DOPRNT_MEMORY (str, strlen (str)) |
---|
| 2892 | |
---|
| 2893 | #define DOPRNT_REPS_MAYBE(c, n) \ |
---|
| 2894 | do { \ |
---|
| 2895 | if ((n) != 0) \ |
---|
| 2896 | DOPRNT_REPS (c, n); \ |
---|
| 2897 | } while (0) |
---|
| 2898 | #define DOPRNT_MEMORY_MAYBE(ptr, len) \ |
---|
| 2899 | do { \ |
---|
| 2900 | if ((len) != 0) \ |
---|
| 2901 | DOPRNT_MEMORY (ptr, len); \ |
---|
| 2902 | } while (0) |
---|
| 2903 | |
---|
| 2904 | __GMP_DECLSPEC int __gmp_doprnt _PROTO ((const struct doprnt_funs_t *, void *, const char *, va_list)); |
---|
| 2905 | __GMP_DECLSPEC int __gmp_doprnt_integer _PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *)); |
---|
| 2906 | __GMP_DECLSPEC int __gmp_doprnt_mpf _PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, mpf_srcptr)); |
---|
| 2907 | /* why no __GMP_DECLSPEC here??? */ |
---|
| 2908 | int __gmp_replacement_vsnprintf _PROTO ((char *, size_t, const char *, va_list)); |
---|
| 2909 | #endif /* _GMP_H_HAVE_VA_LIST */ |
---|
| 2910 | |
---|
| 2911 | |
---|
| 2912 | typedef int (*gmp_doscan_scan_t) _PROTO ((void *, const char *, ...)); |
---|
| 2913 | typedef void *(*gmp_doscan_step_t) _PROTO ((void *, int)); |
---|
| 2914 | typedef int (*gmp_doscan_get_t) _PROTO ((void *)); |
---|
| 2915 | typedef int (*gmp_doscan_unget_t) _PROTO ((int, void *)); |
---|
| 2916 | |
---|
| 2917 | struct gmp_doscan_funs_t { |
---|
| 2918 | gmp_doscan_scan_t scan; |
---|
| 2919 | gmp_doscan_step_t step; |
---|
| 2920 | gmp_doscan_get_t get; |
---|
| 2921 | gmp_doscan_unget_t unget; |
---|
| 2922 | }; |
---|
| 2923 | extern const struct gmp_doscan_funs_t __gmp_fscanf_funs; |
---|
| 2924 | extern const struct gmp_doscan_funs_t __gmp_sscanf_funs; |
---|
| 2925 | |
---|
| 2926 | #if _GMP_H_HAVE_VA_LIST |
---|
| 2927 | int __gmp_doscan _PROTO ((const struct gmp_doscan_funs_t *, void *, |
---|
| 2928 | const char *, va_list)); |
---|
| 2929 | #endif |
---|
| 2930 | |
---|
| 2931 | |
---|
[15293] | 2932 | /* For testing and debugging. */ |
---|
[18190] | 2933 | #define MPZ_CHECK_FORMAT(z) \ |
---|
| 2934 | do { \ |
---|
| 2935 | ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0); \ |
---|
| 2936 | ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z)); \ |
---|
| 2937 | ASSERT_ALWAYS_MPN (PTR(z), ABSIZ(z)); \ |
---|
| 2938 | } while (0) |
---|
| 2939 | |
---|
| 2940 | #define MPQ_CHECK_FORMAT(q) \ |
---|
| 2941 | do { \ |
---|
| 2942 | MPZ_CHECK_FORMAT (mpq_numref (q)); \ |
---|
| 2943 | MPZ_CHECK_FORMAT (mpq_denref (q)); \ |
---|
| 2944 | ASSERT_ALWAYS (SIZ(mpq_denref(q)) >= 1); \ |
---|
| 2945 | \ |
---|
| 2946 | if (SIZ(mpq_numref(q)) == 0) \ |
---|
| 2947 | { \ |
---|
| 2948 | /* should have zero as 0/1 */ \ |
---|
| 2949 | ASSERT_ALWAYS (SIZ(mpq_denref(q)) == 1 \ |
---|
| 2950 | && PTR(mpq_denref(q))[0] == 1); \ |
---|
| 2951 | } \ |
---|
| 2952 | else \ |
---|
| 2953 | { \ |
---|
| 2954 | /* should have no common factors */ \ |
---|
| 2955 | mpz_t g; \ |
---|
| 2956 | mpz_init (g); \ |
---|
| 2957 | mpz_gcd (g, mpq_numref(q), mpq_denref(q)); \ |
---|
| 2958 | ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0); \ |
---|
| 2959 | mpz_clear (g); \ |
---|
| 2960 | } \ |
---|
| 2961 | } while (0) |
---|
| 2962 | |
---|
| 2963 | #define MPF_CHECK_FORMAT(f) \ |
---|
| 2964 | do { \ |
---|
| 2965 | ASSERT_ALWAYS (PREC(f) >= __GMPF_BITS_TO_PREC(53)); \ |
---|
| 2966 | ASSERT_ALWAYS (ABSIZ(f) <= PREC(f)+1); \ |
---|
| 2967 | if (SIZ(f) == 0) \ |
---|
| 2968 | ASSERT_ALWAYS (EXP(f) == 0); \ |
---|
| 2969 | if (SIZ(f) != 0) \ |
---|
| 2970 | ASSERT_ALWAYS (PTR(f)[ABSIZ(f) - 1] != 0); \ |
---|
| 2971 | } while (0) |
---|
| 2972 | |
---|
| 2973 | |
---|
| 2974 | #define MPZ_PROVOKE_REALLOC(z) \ |
---|
[15293] | 2975 | do { ALLOC(z) = ABSIZ(z); } while (0) |
---|
| 2976 | |
---|
| 2977 | |
---|
| 2978 | #if TUNE_PROGRAM_BUILD |
---|
| 2979 | /* Some extras wanted when recompiling some .c files for use by the tune |
---|
| 2980 | program. Not part of a normal build. */ |
---|
| 2981 | |
---|
| 2982 | extern mp_size_t mul_threshold[]; |
---|
| 2983 | extern mp_size_t fft_modf_mul_threshold; |
---|
| 2984 | extern mp_size_t sqr_threshold[]; |
---|
| 2985 | extern mp_size_t fft_modf_sqr_threshold; |
---|
[18190] | 2986 | extern mp_size_t sb_preinv_threshold[]; |
---|
| 2987 | extern mp_size_t dc_threshold[]; |
---|
[15293] | 2988 | extern mp_size_t powm_threshold[]; |
---|
| 2989 | extern mp_size_t gcd_accel_threshold[]; |
---|
| 2990 | extern mp_size_t gcdext_threshold[]; |
---|
[18190] | 2991 | extern mp_size_t divrem_1_norm_threshold[]; |
---|
| 2992 | extern mp_size_t divrem_1_unnorm_threshold[]; |
---|
| 2993 | extern mp_size_t divrem_2_threshold[]; |
---|
| 2994 | extern mp_size_t mod_1_norm_threshold[]; |
---|
| 2995 | extern mp_size_t mod_1_unnorm_threshold[]; |
---|
| 2996 | extern mp_size_t get_str_basecase_threshold[]; |
---|
| 2997 | extern mp_size_t get_str_precompute_threshold[]; |
---|
[15293] | 2998 | |
---|
[18190] | 2999 | #undef MUL_KARATSUBA_THRESHOLD |
---|
| 3000 | #undef MUL_TOOM3_THRESHOLD |
---|
| 3001 | #undef MUL_FFT_TABLE |
---|
| 3002 | #undef MUL_FFT_THRESHOLD |
---|
| 3003 | #undef MUL_FFT_MODF_THRESHOLD |
---|
| 3004 | #undef SQR_BASECASE_THRESHOLD |
---|
| 3005 | #undef SQR_KARATSUBA_THRESHOLD |
---|
| 3006 | #undef SQR_TOOM3_THRESHOLD |
---|
| 3007 | #undef SQR_FFT_TABLE |
---|
| 3008 | #undef SQR_FFT_THRESHOLD |
---|
| 3009 | #undef SQR_FFT_MODF_THRESHOLD |
---|
| 3010 | #undef DIV_DC_THRESHOLD |
---|
[15293] | 3011 | #undef POWM_THRESHOLD |
---|
| 3012 | #undef GCD_ACCEL_THRESHOLD |
---|
| 3013 | #undef GCDEXT_THRESHOLD |
---|
[18190] | 3014 | #undef DIVREM_1_NORM_THRESHOLD |
---|
| 3015 | #undef DIVREM_1_UNNORM_THRESHOLD |
---|
| 3016 | #undef MOD_1_NORM_THRESHOLD |
---|
| 3017 | #undef MOD_1_UNNORM_THRESHOLD |
---|
| 3018 | #undef GET_STR_DC_THRESHOLD |
---|
| 3019 | #undef GET_STR_PRECOMPUTE_THRESHOLD |
---|
[15293] | 3020 | |
---|
[18190] | 3021 | #define MUL_KARATSUBA_THRESHOLD mul_threshold[0] |
---|
| 3022 | #define MUL_TOOM3_THRESHOLD mul_threshold[1] |
---|
| 3023 | #define MUL_FFT_TABLE { 0 } |
---|
| 3024 | #define MUL_FFT_THRESHOLD mul_threshold[2] |
---|
| 3025 | #define MUL_FFT_MODF_THRESHOLD fft_modf_mul_threshold |
---|
| 3026 | #define SQR_BASECASE_THRESHOLD sqr_threshold[0] |
---|
| 3027 | #define SQR_KARATSUBA_THRESHOLD sqr_threshold[1] |
---|
| 3028 | #define SQR_TOOM3_THRESHOLD sqr_threshold[2] |
---|
| 3029 | #define SQR_FFT_TABLE { 0 } |
---|
| 3030 | #define SQR_FFT_THRESHOLD sqr_threshold[3] |
---|
| 3031 | #define SQR_FFT_MODF_THRESHOLD fft_modf_sqr_threshold |
---|
| 3032 | #define DIV_DC_THRESHOLD dc_threshold[0] |
---|
| 3033 | #define POWM_THRESHOLD powm_threshold[0] |
---|
| 3034 | #define GCD_ACCEL_THRESHOLD gcd_accel_threshold[0] |
---|
| 3035 | #define GCDEXT_THRESHOLD gcdext_threshold[0] |
---|
| 3036 | #define DIVREM_1_NORM_THRESHOLD divrem_1_norm_threshold[0] |
---|
| 3037 | #define DIVREM_1_UNNORM_THRESHOLD divrem_1_unnorm_threshold[0] |
---|
| 3038 | #define MOD_1_NORM_THRESHOLD mod_1_norm_threshold[0] |
---|
| 3039 | #define MOD_1_UNNORM_THRESHOLD mod_1_unnorm_threshold[0] |
---|
| 3040 | #define GET_STR_DC_THRESHOLD get_str_basecase_threshold[0] |
---|
| 3041 | #define GET_STR_PRECOMPUTE_THRESHOLD get_str_precompute_threshold[0] |
---|
[15293] | 3042 | |
---|
[18190] | 3043 | #if ! UDIV_PREINV_ALWAYS |
---|
| 3044 | #undef DIV_SB_PREINV_THRESHOLD |
---|
| 3045 | #undef DIVREM_2_THRESHOLD |
---|
| 3046 | #define DIV_SB_PREINV_THRESHOLD sb_preinv_threshold[0] |
---|
| 3047 | #define DIVREM_2_THRESHOLD divrem_2_threshold[0] |
---|
| 3048 | #endif |
---|
[15293] | 3049 | |
---|
[18190] | 3050 | /* Sizes the tune program tests up to, used in a couple of recompilations. */ |
---|
| 3051 | #define SQR_KARATSUBA_MAX_GENERIC 200 |
---|
| 3052 | #define MUL_TOOM3_THRESHOLD_LIMIT 700 |
---|
| 3053 | #define GET_STR_THRESHOLD_LIMIT 500 |
---|
| 3054 | |
---|
[15293] | 3055 | #undef FFT_TABLE_ATTRS |
---|
| 3056 | #define FFT_TABLE_ATTRS |
---|
| 3057 | extern mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE]; |
---|
| 3058 | |
---|
[18190] | 3059 | #if TUNE_PROGRAM_BUILD_SQR |
---|
| 3060 | #undef SQR_KARATSUBA_THRESHOLD |
---|
| 3061 | #define SQR_KARATSUBA_THRESHOLD SQR_KARATSUBA_MAX_GENERIC |
---|
| 3062 | #endif |
---|
| 3063 | |
---|
[15293] | 3064 | #endif /* TUNE_PROGRAM_BUILD */ |
---|
| 3065 | |
---|
| 3066 | #if defined (__cplusplus) |
---|
| 3067 | } |
---|
| 3068 | #endif |
---|
[18190] | 3069 | |
---|
| 3070 | |
---|
| 3071 | #ifdef __cplusplus |
---|
| 3072 | |
---|
| 3073 | /* A little helper for a null-terminated __gmp_allocate_func string. |
---|
| 3074 | The destructor ensures it's freed even if an exception is thrown. */ |
---|
| 3075 | class gmp_allocated_string { |
---|
| 3076 | public: |
---|
| 3077 | char *str; |
---|
| 3078 | gmp_allocated_string(char *arg) { str = arg; } |
---|
| 3079 | ~gmp_allocated_string() { (*__gmp_free_func) (str, strlen(str)+1); } |
---|
| 3080 | }; |
---|
| 3081 | |
---|
| 3082 | int __gmp_istream_set_base (std::istream &, char &, bool &, bool &); |
---|
| 3083 | void __gmp_istream_set_digits (std::string &, std::istream &, char &, bool &, int); |
---|
| 3084 | void __gmp_doprnt_params_from_ios (struct doprnt_params_t *p, std::ios &o); |
---|
| 3085 | std::ostream& __gmp_doprnt_integer_ostream (std::ostream &o, struct doprnt_params_t *p, char *s); |
---|
| 3086 | extern const struct doprnt_funs_t __gmp_asprintf_funs_noformat; |
---|
| 3087 | |
---|
| 3088 | #endif /* __cplusplus */ |
---|
| 3089 | |
---|
| 3090 | #endif /* __GMP_IMPL_H__ */ |
---|