source: trunk/third/gmp/tests/refmpn.c @ 22254

Revision 22254, 38.0 KB checked in by ghudson, 19 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r22253, which included commits to RCS files with non-trunk default branches.
Line 
1/* Reference mpn functions, designed to be simple, portable and independent
2   of the normal gmp code.  Speed isn't a consideration.
3
4Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2004 Free Software
5Foundation, Inc.
6
7This file is part of the GNU MP Library.
8
9The GNU MP Library is free software; you can redistribute it and/or modify
10it under the terms of the GNU Lesser General Public License as published by
11the Free Software Foundation; either version 2.1 of the License, or (at your
12option) any later version.
13
14The GNU MP Library is distributed in the hope that it will be useful, but
15WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
17License for more details.
18
19You should have received a copy of the GNU Lesser General Public License
20along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
21the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
22MA 02111-1307, USA. */
23
24
25/* Most routines have assertions representing what the mpn routines are
26   supposed to accept.  Many of these reference routines do sensible things
27   outside these ranges (eg. for size==0), but the assertions are present to
28   pick up bad parameters passed here that are about to be passed the same
29   to a real mpn routine being compared.  */
30
31/* always do assertion checking */
32#define WANT_ASSERT  1
33
34#include <stdio.h>  /* for NULL */
35#include <stdlib.h> /* for malloc */
36
37#include "gmp.h"
38#include "gmp-impl.h"
39#include "longlong.h"
40
41#include "tests.h"
42
43
44
45/* Return non-zero if regions {xp,xsize} and {yp,ysize} overlap, with sizes
46   in bytes. */
47int
48byte_overlap_p (const void *v_xp, mp_size_t xsize,
49                const void *v_yp, mp_size_t ysize)
50{
51  const char *xp = v_xp;
52  const char *yp = v_yp;
53
54  ASSERT (xsize >= 0);
55  ASSERT (ysize >= 0);
56
57  /* no wraparounds */
58  ASSERT (xp+xsize >= xp);
59  ASSERT (yp+ysize >= yp);
60
61  if (xp + xsize <= yp)
62    return 0;
63
64  if (yp + ysize <= xp)
65    return 0;
66
67  return 1;
68}
69
70/* Return non-zero if limb regions {xp,xsize} and {yp,ysize} overlap. */
71int
72refmpn_overlap_p (mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize)
73{
74  return byte_overlap_p (xp, xsize * BYTES_PER_MP_LIMB,
75                         yp, ysize * BYTES_PER_MP_LIMB);
76}
77
78/* Check overlap for a routine defined to work low to high. */
79int
80refmpn_overlap_low_to_high_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
81{
82  return (dst <= src || ! refmpn_overlap_p (dst, size, src, size));
83}
84
85/* Check overlap for a routine defined to work high to low. */
86int
87refmpn_overlap_high_to_low_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
88{
89  return (dst >= src || ! refmpn_overlap_p (dst, size, src, size));
90}
91
92/* Check overlap for a standard routine requiring equal or separate. */
93int
94refmpn_overlap_fullonly_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
95{
96  return (dst == src || ! refmpn_overlap_p (dst, size, src, size));
97}
98int
99refmpn_overlap_fullonly_two_p (mp_srcptr dst, mp_srcptr src1, mp_srcptr src2,
100                               mp_size_t size)
101{
102  return (refmpn_overlap_fullonly_p (dst, src1, size)
103          && refmpn_overlap_fullonly_p (dst, src2, size));
104}
105
106
107mp_ptr
108refmpn_malloc_limbs (mp_size_t size)
109{
110  mp_ptr  p;
111  ASSERT (size >= 0);
112  if (size == 0)
113    size = 1;
114  p = (mp_ptr) malloc (size * BYTES_PER_MP_LIMB);
115  ASSERT (p != NULL);
116  return p;
117}
118
119mp_ptr
120refmpn_memdup_limbs (mp_srcptr ptr, mp_size_t size)
121{
122  mp_ptr  p;
123  p = refmpn_malloc_limbs (size);
124  refmpn_copyi (p, ptr, size);
125  return p;
126}
127
128/* malloc n limbs on a multiple of m bytes boundary */
129mp_ptr
130refmpn_malloc_limbs_aligned (size_t n, size_t m)
131{
132  return (mp_ptr) align_pointer (refmpn_malloc_limbs (n + m-1), m);
133}
134
135
136void
137refmpn_fill (mp_ptr ptr, mp_size_t size, mp_limb_t value)
138{
139  mp_size_t  i;
140  ASSERT (size >= 0);
141  for (i = 0; i < size; i++)
142    ptr[i] = value;
143}
144
145void
146refmpn_zero (mp_ptr ptr, mp_size_t size)
147{
148  refmpn_fill (ptr, size, CNST_LIMB(0));
149}
150
151int
152refmpn_zero_p (mp_srcptr ptr, mp_size_t size)
153{
154  mp_size_t  i;
155  for (i = 0; i < size; i++)
156    if (ptr[i] != 0)
157      return 0;
158  return 1;
159}
160
161/* the highest one bit in x */
162mp_limb_t
163refmpn_msbone (mp_limb_t x)
164{
165  mp_limb_t  n = (mp_limb_t) 1 << (BITS_PER_MP_LIMB-1);
166
167  while (n != 0)
168    {
169      if (x & n)
170        break;
171      n >>= 1;
172    }
173  return n;
174}
175
176/* a mask of the highest one bit plus and all bits below */
177mp_limb_t
178refmpn_msbone_mask (mp_limb_t x)
179{
180  if (x == 0)
181    return 0;
182
183  return (refmpn_msbone (x) << 1) - 1;
184}
185
186
187void
188refmpn_setbit (mp_ptr ptr, unsigned long bit)
189{
190  ptr[bit/GMP_NUMB_BITS] |= CNST_LIMB(1) << (bit%GMP_NUMB_BITS);
191}
192
193void
194refmpn_clrbit (mp_ptr ptr, unsigned long bit)
195{
196  ptr[bit/GMP_NUMB_BITS] &= ~ (CNST_LIMB(1) << (bit%GMP_NUMB_BITS));
197}
198
199#define REFMPN_TSTBIT(ptr,bit) \
200  (((ptr)[(bit)/GMP_NUMB_BITS] & (CNST_LIMB(1) << ((bit)%GMP_NUMB_BITS))) != 0)
201
202int
203refmpn_tstbit (mp_srcptr ptr, unsigned long bit)
204{
205  return REFMPN_TSTBIT (ptr, bit);
206}
207
208unsigned long
209refmpn_scan0 (mp_srcptr ptr, unsigned long bit)
210{
211  while (REFMPN_TSTBIT (ptr, bit) != 0)
212    bit++;
213  return bit;
214}
215
216unsigned long
217refmpn_scan1 (mp_srcptr ptr, unsigned long bit)
218{
219  while (REFMPN_TSTBIT (ptr, bit) == 0)
220    bit++;
221  return bit;
222}
223
224void
225refmpn_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size)
226{
227  ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
228  refmpn_copyi (rp, sp, size);
229}
230
231void
232refmpn_copyi (mp_ptr rp, mp_srcptr sp, mp_size_t size)
233{
234  mp_size_t i;
235
236  ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
237  ASSERT (size >= 0);
238
239  for (i = 0; i < size; i++)
240    rp[i] = sp[i];
241}
242
243void
244refmpn_copyd (mp_ptr rp, mp_srcptr sp, mp_size_t size)
245{
246  mp_size_t i;
247
248  ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
249  ASSERT (size >= 0);
250
251  for (i = size-1; i >= 0; i--)
252    rp[i] = sp[i];
253}
254
255void
256refmpn_com_n (mp_ptr rp, mp_srcptr sp, mp_size_t size)
257{
258  mp_size_t i;
259
260  ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
261  ASSERT (size >= 1);
262  ASSERT_MPN (sp, size);
263
264  for (i = 0; i < size; i++)
265    rp[i] = sp[i] ^ GMP_NUMB_MASK;
266}
267
268
269int
270refmpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
271{
272  mp_size_t  i;
273
274  ASSERT (size >= 1);
275  ASSERT_MPN (xp, size);
276  ASSERT_MPN (yp, size);
277
278  for (i = size-1; i >= 0; i--)
279    {
280      if (xp[i] > yp[i])  return 1;
281      if (xp[i] < yp[i])  return -1;
282    }
283  return 0;
284}
285
286int
287refmpn_cmp_allowzero (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
288{
289  if (size == 0)
290    return 0;
291  else
292    return refmpn_cmp (xp, yp, size);
293}
294
295int
296refmpn_cmp_twosizes (mp_srcptr xp, mp_size_t xsize,
297                     mp_srcptr yp, mp_size_t ysize)
298{
299  int  opp, cmp;
300
301  ASSERT_MPN (xp, xsize);
302  ASSERT_MPN (yp, ysize);
303
304  opp = (xsize < ysize);
305  if (opp)
306    MPN_SRCPTR_SWAP (xp,xsize, yp,ysize);
307
308  if (! refmpn_zero_p (xp+ysize, xsize-ysize))
309    cmp = 1;
310  else
311    cmp = refmpn_cmp (xp, yp, ysize);
312
313  return (opp ? -cmp : cmp);
314}
315
316int
317refmpn_equal_anynail (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
318{
319  mp_size_t  i;
320  ASSERT (size >= 0);
321
322  for (i = 0; i < size; i++)
323      if (xp[i] != yp[i])
324        return 0;
325  return 1;
326}
327
328
329#define LOGOPS(operation)                                               \
330  {                                                                     \
331    mp_size_t  i;                                                       \
332                                                                        \
333    ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size));        \
334    ASSERT (size >= 1);                                                 \
335    ASSERT_MPN (s1p, size);                                             \
336    ASSERT_MPN (s2p, size);                                             \
337                                                                        \
338    for (i = 0; i < size; i++)                                          \
339      rp[i] = operation;                                                \
340  }
341
342void
343refmpn_and_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
344{
345  LOGOPS (s1p[i] & s2p[i]);
346}
347void
348refmpn_andn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
349{
350  LOGOPS (s1p[i] & ~s2p[i]);
351}
352void
353refmpn_nand_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
354{
355  LOGOPS ((s1p[i] & s2p[i]) ^ GMP_NUMB_MASK);
356}
357void
358refmpn_ior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
359{
360  LOGOPS (s1p[i] | s2p[i]);
361}
362void
363refmpn_iorn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
364{
365  LOGOPS (s1p[i] | (s2p[i] ^ GMP_NUMB_MASK));
366}
367void
368refmpn_nior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
369{
370  LOGOPS ((s1p[i] | s2p[i]) ^ GMP_NUMB_MASK);
371}
372void
373refmpn_xor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
374{
375  LOGOPS (s1p[i] ^ s2p[i]);
376}
377void
378refmpn_xnor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
379{
380  LOGOPS ((s1p[i] ^ s2p[i]) ^ GMP_NUMB_MASK);
381}
382
383/* set *w to x+y, return 0 or 1 carry */
384mp_limb_t
385add (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
386{
387  mp_limb_t  sum, cy;
388
389  ASSERT_LIMB (x);
390  ASSERT_LIMB (y);
391
392  sum = x + y;
393#if GMP_NAIL_BITS == 0
394  *w = sum;
395  cy = (sum < x);
396#else
397  *w = sum & GMP_NUMB_MASK;
398  cy = (sum >> GMP_NUMB_BITS);
399#endif
400  return cy;
401}
402
403/* set *w to x-y, return 0 or 1 borrow */
404mp_limb_t
405sub (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
406{
407  mp_limb_t  diff, cy;
408
409  ASSERT_LIMB (x);
410  ASSERT_LIMB (y);
411
412  diff = x - y;
413#if GMP_NAIL_BITS == 0
414  *w = diff;
415  cy = (diff > x);
416#else
417  *w = diff & GMP_NUMB_MASK;
418  cy = (diff >> GMP_NUMB_BITS) & 1;
419#endif
420  return cy;
421}
422
423/* set *w to x+y+c (where c is 0 or 1), return 0 or 1 carry */
424mp_limb_t
425adc (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
426{
427  mp_limb_t  r;
428
429  ASSERT_LIMB (x);
430  ASSERT_LIMB (y);
431  ASSERT (c == 0 || c == 1);
432
433  r = add (w, x, y);
434  return r + add (w, *w, c);
435}
436
437/* set *w to x-y-c (where c is 0 or 1), return 0 or 1 borrow */
438mp_limb_t
439sbb (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
440{
441  mp_limb_t  r;
442
443  ASSERT_LIMB (x);
444  ASSERT_LIMB (y);
445  ASSERT (c == 0 || c == 1);
446
447  r = sub (w, x, y);
448  return r + sub (w, *w, c);
449}
450
451
452#define AORS_1(operation)                               \
453  {                                                     \
454    mp_limb_t  i;                                       \
455                                                        \
456    ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));  \
457    ASSERT (size >= 1);                                 \
458    ASSERT_MPN (sp, size);                              \
459    ASSERT_LIMB (n);                                    \
460                                                        \
461    for (i = 0; i < size; i++)                          \
462      n = operation (&rp[i], sp[i], n);                 \
463    return n;                                           \
464  }
465
466mp_limb_t
467refmpn_add_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
468{
469  AORS_1 (add);
470}
471mp_limb_t
472refmpn_sub_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
473{
474  AORS_1 (sub);
475}
476
477#define AORS_NC(operation)                                              \
478  {                                                                     \
479    mp_size_t  i;                                                       \
480                                                                        \
481    ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size));        \
482    ASSERT (carry == 0 || carry == 1);                                  \
483    ASSERT (size >= 1);                                                 \
484    ASSERT_MPN (s1p, size);                                             \
485    ASSERT_MPN (s2p, size);                                             \
486                                                                        \
487    for (i = 0; i < size; i++)                                          \
488      carry = operation (&rp[i], s1p[i], s2p[i], carry);                \
489    return carry;                                                       \
490  }
491
492mp_limb_t
493refmpn_add_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
494               mp_limb_t carry)
495{
496  AORS_NC (adc);
497}
498mp_limb_t
499refmpn_sub_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
500               mp_limb_t carry)
501{
502  AORS_NC (sbb);
503}
504
505
506mp_limb_t
507refmpn_add_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
508{
509  return refmpn_add_nc (rp, s1p, s2p, size, CNST_LIMB(0));
510}
511mp_limb_t
512refmpn_sub_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
513{
514  return refmpn_sub_nc (rp, s1p, s2p, size, CNST_LIMB(0));
515}
516
517
518/* Twos complement, return borrow. */
519mp_limb_t
520refmpn_neg_n (mp_ptr dst, mp_srcptr src, mp_size_t size)
521{
522  mp_ptr     zeros;
523  mp_limb_t  ret;
524
525  ASSERT (size >= 1);
526
527  zeros = refmpn_malloc_limbs (size);
528  refmpn_fill (zeros, size, CNST_LIMB(0));
529  ret = refmpn_sub_n (dst, zeros, src, size);
530  free (zeros);
531  return ret;
532}
533
534
535#define AORS(aors_n, aors_1)                                    \
536  {                                                             \
537    mp_limb_t  c;                                               \
538    ASSERT (s1size >= s2size);                                  \
539    ASSERT (s2size >= 1);                                       \
540    c = aors_n (rp, s1p, s2p, s2size);                          \
541    if (s1size-s2size != 0)                                     \
542      c = aors_1 (rp+s2size, s1p+s2size, s1size-s2size, c);     \
543    return c;                                                   \
544  }
545mp_limb_t
546refmpn_add (mp_ptr rp,
547            mp_srcptr s1p, mp_size_t s1size,
548            mp_srcptr s2p, mp_size_t s2size)
549{
550  AORS (refmpn_add_n, refmpn_add_1);
551}
552mp_limb_t
553refmpn_sub (mp_ptr rp,
554            mp_srcptr s1p, mp_size_t s1size,
555            mp_srcptr s2p, mp_size_t s2size)
556{
557  AORS (refmpn_sub_n, refmpn_sub_1);
558}
559
560
561#define SHIFTHIGH(x) ((x) << BITS_PER_MP_LIMB/2)
562#define SHIFTLOW(x)  ((x) >> BITS_PER_MP_LIMB/2)
563
564#define LOWMASK   (((mp_limb_t) 1 << BITS_PER_MP_LIMB/2)-1)
565#define HIGHMASK  SHIFTHIGH(LOWMASK)
566
567#define LOWPART(x)   ((x) & LOWMASK)
568#define HIGHPART(x)  SHIFTLOW((x) & HIGHMASK)
569
570/* Set *hi,*lo to x*y, using full limbs not nails. */
571mp_limb_t
572refmpn_umul_ppmm (mp_limb_t *lo, mp_limb_t x, mp_limb_t y)
573{
574  mp_limb_t  hi, s;
575
576  *lo = LOWPART(x) * LOWPART(y);
577  hi = HIGHPART(x) * HIGHPART(y);
578
579  s = LOWPART(x) * HIGHPART(y);
580  hi += HIGHPART(s);
581  s = SHIFTHIGH(LOWPART(s));
582  *lo += s;
583  hi += (*lo < s);
584
585  s = HIGHPART(x) * LOWPART(y);
586  hi += HIGHPART(s);
587  s = SHIFTHIGH(LOWPART(s));
588  *lo += s;
589  hi += (*lo < s);
590
591  return hi;
592}
593
594mp_limb_t
595refmpn_umul_ppmm_r (mp_limb_t x, mp_limb_t y, mp_limb_t *lo)
596{
597  return refmpn_umul_ppmm (lo, x, y);
598}
599
600mp_limb_t
601refmpn_mul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier,
602               mp_limb_t carry)
603{
604  mp_size_t  i;
605  mp_limb_t  hi, lo;
606
607  ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
608  ASSERT (size >= 1);
609  ASSERT_MPN (sp, size);
610  ASSERT_LIMB (multiplier);
611  ASSERT_LIMB (carry);
612
613  multiplier <<= GMP_NAIL_BITS;
614  for (i = 0; i < size; i++)
615    {
616      hi = refmpn_umul_ppmm (&lo, sp[i], multiplier);
617      lo >>= GMP_NAIL_BITS;
618      ASSERT_NOCARRY (add (&hi, hi, add (&lo, lo, carry)));
619      rp[i] = lo;
620      carry = hi;
621    }
622  return carry;
623}
624
625mp_limb_t
626refmpn_mul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
627{
628  return refmpn_mul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
629}
630
631
632mp_limb_t
633refmpn_mul_2 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_srcptr mult)
634{
635  mp_ptr     src_copy;
636  mp_limb_t  c;
637
638  ASSERT (refmpn_overlap_fullonly_p (dst, src, size));
639  ASSERT (! refmpn_overlap_p (dst, size+1, mult, 2));
640  ASSERT (size >= 1);
641  ASSERT_MPN (mult, 2);
642
643  /* in case dst==src */
644  src_copy = refmpn_malloc_limbs (size);
645  refmpn_copyi (src_copy, src, size);
646  src = src_copy;
647
648  dst[size] = refmpn_mul_1 (dst, src, size, mult[0]);
649  c = refmpn_addmul_1 (dst+1, src, size, mult[1]);
650  free (src_copy);
651  return c;
652}
653
654
655#define AORSMUL_1C(operation_n)                                 \
656  {                                                             \
657    mp_ptr     p;                                               \
658    mp_limb_t  ret;                                             \
659                                                                \
660    ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));          \
661                                                                \
662    p = refmpn_malloc_limbs (size);                             \
663    ret = refmpn_mul_1c (p, sp, size, multiplier, carry);       \
664    ret += operation_n (rp, rp, p, size);                       \
665                                                                \
666    free (p);                                                   \
667    return ret;                                                 \
668  }
669
670mp_limb_t
671refmpn_addmul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
672                  mp_limb_t multiplier, mp_limb_t carry)
673{
674  AORSMUL_1C (refmpn_add_n);
675}
676mp_limb_t
677refmpn_submul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
678                  mp_limb_t multiplier, mp_limb_t carry)
679{
680  AORSMUL_1C (refmpn_sub_n);
681}
682
683
684mp_limb_t
685refmpn_addmul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
686{
687  return refmpn_addmul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
688}
689mp_limb_t
690refmpn_submul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
691{
692  return refmpn_submul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
693}
694
695
696mp_limb_t
697refmpn_addsub_nc (mp_ptr r1p, mp_ptr r2p,
698                  mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
699                  mp_limb_t carry)
700{
701  mp_ptr p;
702  mp_limb_t acy, scy;
703
704  /* Destinations can't overlap. */
705  ASSERT (! refmpn_overlap_p (r1p, size, r2p, size));
706  ASSERT (refmpn_overlap_fullonly_two_p (r1p, s1p, s2p, size));
707  ASSERT (refmpn_overlap_fullonly_two_p (r2p, s1p, s2p, size));
708  ASSERT (size >= 1);
709
710  /* in case r1p==s1p or r1p==s2p */
711  p = refmpn_malloc_limbs (size);
712
713  acy = refmpn_add_nc (p, s1p, s2p, size, carry >> 1);
714  scy = refmpn_sub_nc (r2p, s1p, s2p, size, carry & 1);
715  refmpn_copyi (r1p, p, size);
716
717  free (p);
718  return 2 * acy + scy;
719}
720
721mp_limb_t
722refmpn_addsub_n (mp_ptr r1p, mp_ptr r2p,
723                 mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
724{
725  return refmpn_addsub_nc (r1p, r2p, s1p, s2p, size, CNST_LIMB(0));
726}
727
728
729/* Right shift hi,lo and return the low limb of the result.
730   Note a shift by BITS_PER_MP_LIMB isn't assumed to work (doesn't on x86). */
731mp_limb_t
732rshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
733{
734  ASSERT (shift < GMP_NUMB_BITS);
735  if (shift == 0)
736    return lo;
737  else
738    return ((hi << (GMP_NUMB_BITS-shift)) | (lo >> shift)) & GMP_NUMB_MASK;
739}
740
741/* Left shift hi,lo and return the high limb of the result.
742   Note a shift by BITS_PER_MP_LIMB isn't assumed to work (doesn't on x86). */
743mp_limb_t
744lshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
745{
746  ASSERT (shift < GMP_NUMB_BITS);
747  if (shift == 0)
748    return hi;
749  else
750    return ((hi << shift) | (lo >> (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
751}
752
753
754mp_limb_t
755refmpn_rshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
756{
757  mp_limb_t  ret;
758  mp_size_t  i;
759
760  ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
761  ASSERT (size >= 1);
762  ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
763  ASSERT_MPN (sp, size);
764
765  ret = rshift_make (sp[0], CNST_LIMB(0), shift);
766
767  for (i = 0; i < size-1; i++)
768    rp[i] = rshift_make (sp[i+1], sp[i], shift);
769
770  rp[i] = rshift_make (CNST_LIMB(0), sp[i], shift);
771  return ret;
772}
773
774mp_limb_t
775refmpn_lshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
776{
777  mp_limb_t  ret;
778  mp_size_t  i;
779
780  ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
781  ASSERT (size >= 1);
782  ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
783  ASSERT_MPN (sp, size);
784
785  ret = lshift_make (CNST_LIMB(0), sp[size-1], shift);
786
787  for (i = size-2; i >= 0; i--)
788    rp[i+1] = lshift_make (sp[i+1], sp[i], shift);
789
790  rp[i+1] = lshift_make (sp[i+1], CNST_LIMB(0), shift);
791  return ret;
792}
793
794
795mp_limb_t
796refmpn_rshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
797{
798  if (shift == 0)
799    {
800      refmpn_copyi (rp, sp, size);
801      return 0;
802    }
803  else
804    {
805      return refmpn_rshift (rp, sp, size, shift);
806    }
807}
808
809mp_limb_t
810refmpn_lshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
811{
812  if (shift == 0)
813    {
814      refmpn_copyd (rp, sp, size);
815      return 0;
816    }
817  else
818    {
819      return refmpn_lshift (rp, sp, size, shift);
820    }
821}
822
823
824/* Divide h,l by d, return quotient, store remainder to *rp.
825   Operates on full limbs, not nails.
826   Must have h < d.
827   __udiv_qrnnd_c isn't simple, and it's a bit slow, but it works. */
828mp_limb_t
829refmpn_udiv_qrnnd (mp_limb_t *rp, mp_limb_t h, mp_limb_t l, mp_limb_t d)
830{
831  mp_limb_t  q, r;
832  int  n;
833
834  ASSERT (d != 0);
835  ASSERT (h < d);
836
837#if 0
838  udiv_qrnnd (q, r, h, l, d);
839  *rp = r;
840  return q;
841#endif
842
843  n = refmpn_count_leading_zeros (d);
844  d <<= n;
845
846  if (n != 0)
847    {
848      h = (h << n) | (l >> (GMP_LIMB_BITS - n));
849      l <<= n;
850    }
851
852  __udiv_qrnnd_c (q, r, h, l, d);
853  r >>= n;
854  *rp = r;
855  return q;
856}
857
858mp_limb_t
859refmpn_udiv_qrnnd_r (mp_limb_t h, mp_limb_t l, mp_limb_t d, mp_limb_t *rp)
860{
861  return refmpn_udiv_qrnnd (rp, h, l, d);
862}
863
864/* This little subroutine avoids some bad code generation from i386 gcc 3.0
865   -fPIC -O2 -fomit-frame-pointer (%ebp being used uninitialized).  */
866static mp_limb_t
867refmpn_divmod_1c_workaround (mp_ptr rp, mp_srcptr sp, mp_size_t size,
868                             mp_limb_t divisor, mp_limb_t carry)
869{
870  mp_size_t  i;
871  for (i = size-1; i >= 0; i--)
872    {
873      rp[i] = refmpn_udiv_qrnnd (&carry, carry,
874                                 sp[i] << GMP_NAIL_BITS,
875                                 divisor << GMP_NAIL_BITS);
876      carry >>= GMP_NAIL_BITS;
877    }
878  return carry;
879}
880
881mp_limb_t
882refmpn_divmod_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
883                  mp_limb_t divisor, mp_limb_t carry)
884{
885  mp_ptr     sp_orig;
886  mp_ptr     prod;
887  mp_limb_t  carry_orig;
888
889  ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
890  ASSERT (size >= 0);
891  ASSERT (carry < divisor);
892  ASSERT_MPN (sp, size);
893  ASSERT_LIMB (divisor);
894  ASSERT_LIMB (carry);
895
896  if (size == 0)
897    return carry;
898
899  sp_orig = refmpn_memdup_limbs (sp, size);
900  prod = refmpn_malloc_limbs (size);
901  carry_orig = carry;
902
903  carry = refmpn_divmod_1c_workaround (rp, sp, size, divisor, carry);
904
905  /* check by multiplying back */
906#if 0
907  printf ("size=%ld divisor=0x%lX carry=0x%lX remainder=0x%lX\n",
908          size, divisor, carry_orig, carry);
909  mpn_trace("s",sp_copy,size);
910  mpn_trace("r",rp,size);
911  printf ("mul_1c %lX\n", refmpn_mul_1c (prod, rp, size, divisor, carry));
912  mpn_trace("p",prod,size);
913#endif
914  ASSERT (refmpn_mul_1c (prod, rp, size, divisor, carry) == carry_orig);
915  ASSERT (refmpn_cmp (prod, sp_orig, size) == 0);
916  free (sp_orig);
917  free (prod);
918
919  return carry;
920}
921
922mp_limb_t
923refmpn_divmod_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
924{
925  return refmpn_divmod_1c (rp, sp, size, divisor, CNST_LIMB(0));
926}
927
928
929mp_limb_t
930refmpn_mod_1c (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
931               mp_limb_t carry)
932{
933  mp_ptr  p = refmpn_malloc_limbs (size);
934  carry = refmpn_divmod_1c (p, sp, size, divisor, carry);
935  free (p);
936  return carry;
937}
938
939mp_limb_t
940refmpn_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
941{
942  return refmpn_mod_1c (sp, size, divisor, CNST_LIMB(0));
943}
944
945mp_limb_t
946refmpn_preinv_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
947                     mp_limb_t inverse)
948{
949  ASSERT (divisor & GMP_NUMB_HIGHBIT);
950  ASSERT (inverse == refmpn_invert_limb (divisor));
951  return refmpn_mod_1 (sp, size, divisor);
952}
953
954/* This implementation will be rather slow, but has the advantage of being
955   in a different style than the libgmp versions.  */
956mp_limb_t
957refmpn_mod_34lsub1 (mp_srcptr p, mp_size_t n)
958{
959  ASSERT ((GMP_NUMB_BITS % 4) == 0);
960  return mpn_mod_1 (p, n, (CNST_LIMB(1) << (3 * GMP_NUMB_BITS / 4)) - 1);
961}
962
963
964mp_limb_t
965refmpn_divrem_1c (mp_ptr rp, mp_size_t xsize,
966                  mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
967                  mp_limb_t carry)
968{
969  mp_ptr  z;
970
971  z = refmpn_malloc_limbs (xsize);
972  refmpn_fill (z, xsize, CNST_LIMB(0));
973
974  carry = refmpn_divmod_1c (rp+xsize, sp, size, divisor, carry);
975  carry = refmpn_divmod_1c (rp, z, xsize, divisor, carry);
976
977  free (z);
978  return carry;
979}
980
981mp_limb_t
982refmpn_divrem_1 (mp_ptr rp, mp_size_t xsize,
983                 mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
984{
985  return refmpn_divrem_1c (rp, xsize, sp, size, divisor, CNST_LIMB(0));
986}
987
988mp_limb_t
989refmpn_preinv_divrem_1 (mp_ptr rp, mp_size_t xsize,
990                        mp_srcptr sp, mp_size_t size,
991                        mp_limb_t divisor, mp_limb_t inverse, unsigned shift)
992{
993  ASSERT (size >= 0);
994  ASSERT (shift == refmpn_count_leading_zeros (divisor));
995  ASSERT (inverse == refmpn_invert_limb (divisor << shift));
996
997  return refmpn_divrem_1 (rp, xsize, sp, size, divisor);
998}
999
1000
1001/* Inverse is floor((b*(b-d)-1) / d), per division by invariant integers
1002   paper, figure 8.1 m', where b=2^BITS_PER_MP_LIMB.  Note that -d-1 < d
1003   since d has the high bit set. */
1004
1005mp_limb_t
1006refmpn_invert_limb (mp_limb_t d)
1007{
1008  mp_limb_t r;
1009  ASSERT (d & GMP_LIMB_HIGHBIT);
1010  return refmpn_udiv_qrnnd (&r, -d-1, -1, d);
1011}
1012
1013
1014/* The aim is to produce a dst quotient and return a remainder c, satisfying
1015   c*b^n + src-i == 3*dst, where i is the incoming carry.
1016
1017   Some value c==0, c==1 or c==2 will satisfy, so just try each.
1018
1019   If GMP_NUMB_BITS is even then 2^GMP_NUMB_BITS==1mod3 and a non-zero
1020   remainder from the first division attempt determines the correct
1021   remainder (3-c), but don't bother with that, since we can't guarantee
1022   anything about GMP_NUMB_BITS when using nails.
1023
1024   If the initial src-i produces a borrow then refmpn_sub_1 leaves a twos
1025   complement negative, ie. b^n+a-i, and the calculation produces c1
1026   satisfying c1*b^n + b^n+src-i == 3*dst, from which clearly c=c1+1.  This
1027   means it's enough to just add any borrow back at the end.
1028
1029   A borrow only occurs when a==0 or a==1, and, by the same reasoning as in
1030   mpn/generic/diveby3.c, the c1 that results in those cases will only be 0
1031   or 1 respectively, so with 1 added the final return value is still in the
1032   prescribed range 0 to 2. */
1033
1034mp_limb_t
1035refmpn_divexact_by3c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t carry)
1036{
1037  mp_ptr     spcopy;
1038  mp_limb_t  c, cs;
1039
1040  ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
1041  ASSERT (size >= 1);
1042  ASSERT (carry <= 2);
1043  ASSERT_MPN (sp, size);
1044
1045  spcopy = refmpn_malloc_limbs (size);
1046  cs = refmpn_sub_1 (spcopy, sp, size, carry);
1047
1048  for (c = 0; c <= 2; c++)
1049    if (refmpn_divmod_1c (rp, spcopy, size, 3, c) == 0)
1050      goto done;
1051  ASSERT_FAIL (no value of c satisfies);
1052
1053 done:
1054  c += cs;
1055  ASSERT (c <= 2);
1056
1057  free (spcopy);
1058  return c;
1059}
1060
1061mp_limb_t
1062refmpn_divexact_by3 (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1063{
1064  return refmpn_divexact_by3c (rp, sp, size, CNST_LIMB(0));
1065}
1066
1067
1068/* The same as mpn/generic/mul_basecase.c, but using refmpn functions. */
1069void
1070refmpn_mul_basecase (mp_ptr prodp,
1071                     mp_srcptr up, mp_size_t usize,
1072                     mp_srcptr vp, mp_size_t vsize)
1073{
1074  mp_size_t i;
1075
1076  ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
1077  ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
1078  ASSERT (usize >= vsize);
1079  ASSERT (vsize >= 1);
1080  ASSERT_MPN (up, usize);
1081  ASSERT_MPN (vp, vsize);
1082
1083  prodp[usize] = refmpn_mul_1 (prodp, up, usize, vp[0]);
1084  for (i = 1; i < vsize; i++)
1085    prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]);
1086}
1087
1088void
1089refmpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
1090{
1091  refmpn_mul_basecase (prodp, up, size, vp, size);
1092}
1093
1094void
1095refmpn_sqr (mp_ptr dst, mp_srcptr src, mp_size_t size)
1096{
1097  refmpn_mul_basecase (dst, src, size, src, size);
1098}
1099
1100/* Allowing usize<vsize, usize==0 or vsize==0. */
1101void
1102refmpn_mul_any (mp_ptr prodp,
1103                     mp_srcptr up, mp_size_t usize,
1104                     mp_srcptr vp, mp_size_t vsize)
1105{
1106  ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
1107  ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
1108  ASSERT (usize >= 0);
1109  ASSERT (vsize >= 0);
1110  ASSERT_MPN (up, usize);
1111  ASSERT_MPN (vp, vsize);
1112
1113  if (usize == 0)
1114    {
1115      refmpn_fill (prodp, vsize, CNST_LIMB(0));
1116      return;
1117    }
1118
1119  if (vsize == 0)
1120    {
1121      refmpn_fill (prodp, usize, CNST_LIMB(0));
1122      return;
1123    }
1124
1125  if (usize >= vsize)
1126    refmpn_mul_basecase (prodp, up, usize, vp, vsize);
1127  else
1128    refmpn_mul_basecase (prodp, vp, vsize, up, usize);
1129}
1130
1131
1132mp_limb_t
1133refmpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y)
1134{
1135  mp_limb_t  x;
1136  int  twos;
1137
1138  ASSERT (y != 0);
1139  ASSERT (! refmpn_zero_p (xp, xsize));
1140  ASSERT_MPN (xp, xsize);
1141  ASSERT_LIMB (y);
1142
1143  x = refmpn_mod_1 (xp, xsize, y);
1144  if (x == 0)
1145    return y;
1146
1147  twos = 0;
1148  while ((x & 1) == 0 && (y & 1) == 0)
1149    {
1150      x >>= 1;
1151      y >>= 1;
1152      twos++;
1153    }
1154
1155  for (;;)
1156    {
1157      while ((x & 1) == 0)  x >>= 1;
1158      while ((y & 1) == 0)  y >>= 1;
1159
1160      if (x < y)
1161        MP_LIMB_T_SWAP (x, y);
1162
1163      x -= y;
1164      if (x == 0)
1165        break;
1166    }
1167
1168  return y << twos;
1169}
1170
1171
1172/* Based on the full limb x, not nails. */
1173unsigned
1174refmpn_count_leading_zeros (mp_limb_t x)
1175{
1176  unsigned  n = 0;
1177
1178  ASSERT (x != 0);
1179
1180  while ((x & GMP_LIMB_HIGHBIT) == 0)
1181    {
1182      x <<= 1;
1183      n++;
1184    }
1185  return n;
1186}
1187
1188/* Full limbs allowed, not limited to nails. */
1189unsigned
1190refmpn_count_trailing_zeros (mp_limb_t x)
1191{
1192  unsigned  n = 0;
1193
1194  ASSERT (x != 0);
1195  ASSERT_LIMB (x);
1196
1197  while ((x & 1) == 0)
1198    {
1199      x >>= 1;
1200      n++;
1201    }
1202  return n;
1203}
1204
1205/* Strip factors of two (low zero bits) from {p,size} by right shifting.
1206   The return value is the number of twos stripped.  */
1207mp_size_t
1208refmpn_strip_twos (mp_ptr p, mp_size_t size)
1209{
1210  mp_size_t  limbs;
1211  unsigned   shift;
1212
1213  ASSERT (size >= 1);
1214  ASSERT (! refmpn_zero_p (p, size));
1215  ASSERT_MPN (p, size);
1216
1217  for (limbs = 0; p[0] == 0; limbs++)
1218    {
1219      refmpn_copyi (p, p+1, size-1);
1220      p[size-1] = 0;
1221    }
1222
1223  shift = refmpn_count_trailing_zeros (p[0]);
1224  if (shift)
1225    refmpn_rshift (p, p, size, shift);
1226
1227  return limbs*GMP_NUMB_BITS + shift;
1228}
1229
1230mp_limb_t
1231refmpn_gcd (mp_ptr gp, mp_ptr xp, mp_size_t xsize, mp_ptr yp, mp_size_t ysize)
1232{
1233  int       cmp;
1234
1235  ASSERT (ysize >= 1);
1236  ASSERT (xsize >= ysize);
1237  ASSERT ((xp[0] & 1) != 0);
1238  ASSERT ((yp[0] & 1) != 0);
1239  /* ASSERT (xp[xsize-1] != 0); */  /* don't think x needs to be odd */
1240  ASSERT (yp[ysize-1] != 0);
1241  ASSERT (refmpn_overlap_fullonly_p (gp, xp, xsize));
1242  ASSERT (refmpn_overlap_fullonly_p (gp, yp, ysize));
1243  ASSERT (! refmpn_overlap_p (xp, xsize, yp, ysize));
1244  if (xsize == ysize)
1245    ASSERT (refmpn_msbone (xp[xsize-1]) >= refmpn_msbone (yp[ysize-1]));
1246  ASSERT_MPN (xp, xsize);
1247  ASSERT_MPN (yp, ysize);
1248
1249  refmpn_strip_twos (xp, xsize);
1250  MPN_NORMALIZE (xp, xsize);
1251  MPN_NORMALIZE (yp, ysize);
1252
1253  for (;;)
1254    {
1255      cmp = refmpn_cmp_twosizes (xp, xsize, yp, ysize);
1256      if (cmp == 0)
1257        break;
1258      if (cmp < 0)
1259        MPN_PTR_SWAP (xp,xsize, yp,ysize);
1260
1261      ASSERT_NOCARRY (refmpn_sub (xp, xp, xsize, yp, ysize));
1262
1263      refmpn_strip_twos (xp, xsize);
1264      MPN_NORMALIZE (xp, xsize);
1265    }
1266
1267  refmpn_copyi (gp, xp, xsize);
1268  return xsize;
1269}
1270
1271
1272unsigned long
1273refmpn_popcount (mp_srcptr sp, mp_size_t size)
1274{
1275  unsigned long  count = 0;
1276  mp_size_t  i;
1277  int        j;
1278  mp_limb_t  l;
1279
1280  ASSERT (size >= 0);
1281  ASSERT_MPN (sp, size);
1282
1283  for (i = 0; i < size; i++)
1284    {
1285      l = sp[i];
1286      for (j = 0; j < GMP_NUMB_BITS; j++)
1287        {
1288          count += (l & 1);
1289          l >>= 1;
1290        }
1291    }
1292  return count;
1293}
1294
1295unsigned long
1296refmpn_hamdist (mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
1297{
1298  mp_ptr  d;
1299  unsigned long  count;
1300
1301  ASSERT (size >= 0);
1302  ASSERT_MPN (s1p, size);
1303  ASSERT_MPN (s2p, size);
1304
1305  if (size == 0)
1306    return 0;
1307
1308  d = refmpn_malloc_limbs (size);
1309  refmpn_xor_n (d, s1p, s2p, size);
1310  count = refmpn_popcount (d, size);
1311  free (d);
1312  return count;
1313}
1314
1315
1316/* set r to a%d */
1317void
1318refmpn_mod2 (mp_limb_t r[2], const mp_limb_t a[2], const mp_limb_t d[2])
1319{
1320  mp_limb_t  D[2];
1321  int        n;
1322
1323  ASSERT (! refmpn_overlap_p (r, 2, d, 2));
1324  ASSERT_MPN (a, 2);
1325  ASSERT_MPN (d, 2);
1326
1327  D[1] = d[1], D[0] = d[0];
1328  r[1] = a[1], r[0] = a[0];
1329  n = 0;
1330
1331  for (;;)
1332    {
1333      if (D[1] & GMP_NUMB_HIGHBIT)
1334        break;
1335      if (refmpn_cmp (r, D, 2) <= 0)
1336        break;
1337      refmpn_lshift (D, D, 2, 1);
1338      n++;
1339      ASSERT (n <= GMP_NUMB_BITS);
1340    }
1341
1342  while (n >= 0)
1343    {
1344      if (refmpn_cmp (r, D, 2) >= 0)
1345        ASSERT_NOCARRY (refmpn_sub_n (r, r, D, 2));
1346      refmpn_rshift (D, D, 2, 1);
1347      n--;
1348    }
1349
1350  ASSERT (refmpn_cmp (r, d, 2) < 0);
1351}
1352
1353
1354/* Find n where 0<n<2^GMP_NUMB_BITS and n==c mod m */
1355mp_limb_t
1356refmpn_gcd_finda (const mp_limb_t c[2])
1357{
1358  mp_limb_t  n1[2], n2[2];
1359
1360  ASSERT (c[0] != 0);
1361  ASSERT (c[1] != 0);
1362  ASSERT_MPN (c, 2);
1363
1364  n1[0] = c[0];
1365  n1[1] = c[1];
1366
1367  n2[0] = -n1[0];
1368  n2[1] = ~n1[1];
1369
1370  while (n2[1] != 0)
1371    {
1372      refmpn_mod2 (n1, n1, n2);
1373
1374      MP_LIMB_T_SWAP (n1[0], n2[0]);
1375      MP_LIMB_T_SWAP (n1[1], n2[1]);
1376    }
1377
1378  return n2[0];
1379}
1380
1381
1382/* Similar to mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
1383   particular the trial quotient is allowed to be 2 too big. */
1384mp_limb_t
1385refmpn_sb_divrem_mn (mp_ptr qp,
1386                     mp_ptr np, mp_size_t nsize,
1387                     mp_srcptr dp, mp_size_t dsize)
1388{
1389  mp_limb_t  retval = 0;
1390  mp_size_t  i;
1391  mp_limb_t  d1 = dp[dsize-1];
1392  mp_ptr     np_orig = refmpn_memdup_limbs (np, nsize);
1393
1394  ASSERT (nsize >= dsize);
1395  /* ASSERT (dsize > 2); */
1396  ASSERT (dsize >= 2);
1397  ASSERT (dp[dsize-1] & GMP_LIMB_HIGHBIT);
1398  ASSERT (! refmpn_overlap_p (qp, nsize-dsize, np, nsize) || qp+dsize >= np);
1399  ASSERT_MPN (np, nsize);
1400  ASSERT_MPN (dp, dsize);
1401
1402  i = nsize-dsize;
1403  if (refmpn_cmp (np+i, dp, dsize) >= 0)
1404    {
1405      ASSERT_NOCARRY (refmpn_sub_n (np+i, np+i, dp, dsize));
1406      retval = 1;
1407    }
1408
1409  for (i--; i >= 0; i--)
1410    {
1411      mp_limb_t  n0 = np[i+dsize];
1412      mp_limb_t  n1 = np[i+dsize-1];
1413      mp_limb_t  q, dummy_r;
1414
1415      ASSERT (n0 <= d1);
1416      if (n0 == d1)
1417        q = MP_LIMB_T_MAX;
1418      else
1419        q = refmpn_udiv_qrnnd (&dummy_r, n0, n1, d1);
1420
1421      n0 -= refmpn_submul_1 (np+i, dp, dsize, q);
1422      ASSERT (n0 == 0 || n0 == MP_LIMB_T_MAX);
1423      if (n0)
1424        {
1425          q--;
1426          if (! refmpn_add_n (np+i, np+i, dp, dsize))
1427            {
1428              q--;
1429              ASSERT (refmpn_add_n (np+i, np+i, dp, dsize) != 0);
1430            }
1431        }
1432      np[i+dsize] = 0;
1433
1434      qp[i] = q;
1435    }
1436
1437  /* remainder < divisor */
1438  ASSERT (refmpn_cmp (np, dp, dsize) < 0);
1439
1440  /* multiply back to original */
1441  {
1442    mp_ptr  mp = refmpn_malloc_limbs (nsize);
1443
1444    refmpn_mul_any (mp, qp, nsize-dsize, dp, dsize);
1445    if (retval)
1446      ASSERT_NOCARRY (refmpn_add_n (mp+nsize-dsize,mp+nsize-dsize, dp, dsize));
1447    ASSERT_NOCARRY (refmpn_add (mp, mp, nsize, np, dsize));
1448    ASSERT (refmpn_cmp (mp, np_orig, nsize) == 0);
1449
1450    free (mp);
1451  }
1452
1453  free (np_orig);
1454  return retval;
1455}
1456
1457/* Similar to mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
1458   particular the trial quotient is allowed to be 2 too big. */
1459void
1460refmpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
1461                mp_ptr np, mp_size_t nsize,
1462                mp_srcptr dp, mp_size_t dsize)
1463{
1464  ASSERT (qxn == 0);
1465  ASSERT_MPN (np, nsize);
1466  ASSERT_MPN (dp, dsize);
1467
1468  if (dsize == 1)
1469    {
1470      rp[0] = refmpn_divmod_1 (qp, np, nsize, dp[0]);
1471      return;
1472    }
1473  else
1474    {
1475      mp_ptr  n2p = refmpn_malloc_limbs (nsize+1);
1476      mp_ptr  d2p = refmpn_malloc_limbs (dsize);
1477      int     norm = refmpn_count_leading_zeros (dp[dsize-1]);
1478
1479      n2p[nsize] = refmpn_lshift_or_copy (n2p, np, nsize, norm);
1480      ASSERT_NOCARRY (refmpn_lshift_or_copy (d2p, dp, dsize, norm));
1481
1482      refmpn_sb_divrem_mn (qp, n2p, nsize+1, d2p, dsize);
1483      refmpn_rshift_or_copy (rp, n2p, dsize, norm);
1484
1485      /* ASSERT (refmpn_zero_p (tp+dsize, nsize-dsize)); */
1486      free (n2p);
1487      free (d2p);
1488    }
1489}
1490
1491size_t
1492refmpn_get_str (unsigned char *dst, int base, mp_ptr src, mp_size_t size)
1493{
1494  unsigned char  *d;
1495  size_t  dsize;
1496
1497  ASSERT (size >= 0);
1498  ASSERT (base >= 2);
1499  ASSERT (base < numberof (__mp_bases));
1500  ASSERT (size == 0 || src[size-1] != 0);
1501  ASSERT_MPN (src, size);
1502
1503  MPN_SIZEINBASE (dsize, src, size, base);
1504  ASSERT (dsize >= 1);
1505  ASSERT (! byte_overlap_p (dst, dsize, src, size * BYTES_PER_MP_LIMB));
1506
1507  if (size == 0)
1508    {
1509      dst[0] = 0;
1510      return 1;
1511    }
1512
1513  /* don't clobber input for power of 2 bases */
1514  if (POW2_P (base))
1515    src = refmpn_memdup_limbs (src, size);
1516
1517  d = dst + dsize;
1518  do
1519    {
1520      d--;
1521      ASSERT (d >= dst);
1522      *d = refmpn_divrem_1 (src, 0, src, size, (mp_limb_t) base);
1523      size -= (src[size-1] == 0);
1524    }
1525  while (size != 0);
1526
1527  /* at most one leading zero */
1528  ASSERT (d == dst || d == dst+1);
1529  if (d != dst)
1530    *dst = 0;
1531
1532  if (POW2_P (base))
1533    free (src);
1534
1535  return dsize;
1536}
1537
1538
1539mp_limb_t
1540refmpn_bswap_limb (mp_limb_t src)
1541{
1542  mp_limb_t  dst;
1543  int        i;
1544
1545  dst = 0;
1546  for (i = 0; i < BYTES_PER_MP_LIMB; i++)
1547    {
1548      dst = (dst << 8) + (src & 0xFF);
1549      src >>= 8;
1550    }
1551  return dst;
1552}
1553
1554
1555/* These random functions are mostly for transitional purposes while adding
1556   nail support, since they're independent of the normal mpn routines.  They
1557   can probably be removed when those normal routines are reliable, though
1558   perhaps something independent would still be useful at times.  */
1559
1560#if BITS_PER_MP_LIMB == 32
1561#define RAND_A  CNST_LIMB(0x29CF535)
1562#endif
1563#if BITS_PER_MP_LIMB == 64
1564#define RAND_A  CNST_LIMB(0xBAECD515DAF0B49D)
1565#endif
1566
1567mp_limb_t  refmpn_random_seed;
1568
1569mp_limb_t
1570refmpn_random_half (void)
1571{
1572  refmpn_random_seed = refmpn_random_seed * RAND_A + 1;
1573  return (refmpn_random_seed >> BITS_PER_MP_LIMB/2);
1574}
1575
1576mp_limb_t
1577refmpn_random_limb (void)
1578{
1579  return ((refmpn_random_half () << (BITS_PER_MP_LIMB/2))
1580           | refmpn_random_half ()) & GMP_NUMB_MASK;
1581}
1582
1583void
1584refmpn_random (mp_ptr ptr, mp_size_t size)
1585{
1586  mp_size_t  i;
1587  if (GMP_NAIL_BITS == 0)
1588    {
1589      mpn_random (ptr, size);
1590      return;
1591    }
1592
1593  for (i = 0; i < size; i++)
1594    ptr[i] = refmpn_random_limb ();
1595}
1596
1597void
1598refmpn_random2 (mp_ptr ptr, mp_size_t size)
1599{
1600  mp_size_t  i;
1601  mp_limb_t  bit, mask, limb;
1602  int        run;
1603 
1604  if (GMP_NAIL_BITS == 0)
1605    {
1606      mpn_random2 (ptr, size);
1607      return;
1608    }
1609
1610#define RUN_MODULUS  32
1611
1612  /* start with ones at a random pos in the high limb */
1613  bit = CNST_LIMB(1) << (refmpn_random_half () % GMP_NUMB_BITS);
1614  mask = 0;
1615  run = 0;
1616
1617  for (i = size-1; i >= 0; i--)
1618    {
1619      limb = 0;
1620      do
1621        {
1622          if (run == 0)
1623            {
1624              run = (refmpn_random_half () % RUN_MODULUS) + 1;
1625              mask = ~mask;
1626            }
1627
1628          limb |= (bit & mask);
1629          bit >>= 1;
1630          run--;
1631        }
1632      while (bit != 0);
1633
1634      ptr[i] = limb;
1635      bit = GMP_NUMB_HIGHBIT;
1636    }
1637}
Note: See TracBrowser for help on using the repository browser.