source: trunk/third/gmp/mpz/powm.c @ 18191

Revision 18191, 10.8 KB checked in by ghudson, 22 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r18190, which included commits to RCS files with non-trunk default branches.
Line 
1/* mpz_powm(res,base,exp,mod) -- Set RES to (base**exp) mod MOD.
2
3Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002 Free Software
4Foundation, Inc.  Contributed by Paul Zimmermann.
5
6This file is part of the GNU MP Library.
7
8The GNU MP Library is free software; you can redistribute it and/or modify
9it under the terms of the GNU Lesser General Public License as published by
10the Free Software Foundation; either version 2.1 of the License, or (at your
11option) any later version.
12
13The GNU MP Library is distributed in the hope that it will be useful, but
14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16License for more details.
17
18You should have received a copy of the GNU Lesser General Public License
19along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
20the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21MA 02111-1307, USA. */
22
23
24#include "gmp.h"
25#include "gmp-impl.h"
26#include "longlong.h"
27#ifdef BERKELEY_MP
28#include "mp.h"
29#endif
30
31
32/* Set c <- tp/R^n mod m.
33   tp should have space for 2*n+1 limbs; clobber its most significant limb. */
34#if ! WANT_REDC_GLOBAL
35static
36#endif
37void
38redc (mp_ptr cp, mp_srcptr mp, mp_size_t n, mp_limb_t Nprim, mp_ptr tp)
39{
40  mp_limb_t cy;
41  mp_limb_t q;
42  mp_size_t j;
43
44  tp[2 * n] = 0;                /* carry guard */
45
46  for (j = 0; j < n; j++)
47    {
48      q = tp[0] * Nprim;
49      cy = mpn_addmul_1 (tp, mp, n, q);
50      mpn_incr_u (tp + n, cy);
51      tp++;
52    }
53
54  if (tp[n] != 0)
55    mpn_sub_n (cp, tp, mp, n);
56  else
57    MPN_COPY (cp, tp, n);
58}
59
60/* Compute t = a mod m, a is defined by (ap,an), m is defined by (mp,mn), and
61   t is defined by (tp,mn).  */
62static void
63reduce (mp_ptr tp, mp_srcptr ap, mp_size_t an, mp_srcptr mp, mp_size_t mn)
64{
65  mp_ptr qp;
66  TMP_DECL (marker);
67
68  TMP_MARK (marker);
69  qp = TMP_ALLOC_LIMBS (an - mn + 1);
70
71  mpn_tdiv_qr (qp, tp, 0L, ap, an, mp, mn);
72
73  TMP_FREE (marker);
74}
75
76#if REDUCE_EXPONENT
77/* Return the group order of the ring mod m.  */
78static mp_limb_t
79phi (mp_limb_t t)
80{
81  mp_limb_t d, m, go;
82
83  go = 1;
84
85  if (t % 2 == 0)
86    {
87      t = t / 2;
88      while (t % 2 == 0)
89        {
90          go *= 2;
91          t = t / 2;
92        }
93    }
94  for (d = 3;; d += 2)
95    {
96      m = d - 1;
97      for (;;)
98        {
99          unsigned long int q = t / d;
100          if (q < d)
101            {
102              if (t <= 1)
103                return go;
104              if (t == d)
105                return go * m;
106              return go * (t - 1);
107            }
108          if (t != q * d)
109            break;
110          go *= m;
111          m = d;
112          t = q;
113        }
114    }
115}
116#endif
117
118/* average number of calls to redc for an exponent of n bits
119   with the sliding window algorithm of base 2^k: the optimal is
120   obtained for the value of k which minimizes 2^(k-1)+n/(k+1):
121
122   n\k    4     5     6     7     8
123   128    156*  159   171   200   261
124   256    309   307*  316   343   403
125   512    617   607*  610   632   688
126   1024   1231  1204  1195* 1207  1256
127   2048   2461  2399  2366  2360* 2396
128   4096   4918  4787  4707  4665* 4670
129*/
130
131
132/* Use REDC instead of usual reduction for sizes < POWM_THRESHOLD.  In REDC
133   each modular multiplication costs about 2*n^2 limbs operations, whereas
134   using usual reduction it costs 3*K(n), where K(n) is the cost of a
135   multiplication using Karatsuba, and a division is assumed to cost 2*K(n),
136   for example using Burnikel-Ziegler's algorithm. This gives a theoretical
137   threshold of a*SQR_KARATSUBA_THRESHOLD, with a=(3/2)^(1/(2-ln(3)/ln(2))) ~
138   2.66.  */
139/* For now, also disable REDC when MOD is even, as the inverse can't handle
140   that.  At some point, we might want to make the code faster for that case,
141   perhaps using CRR.  */
142
143#ifndef POWM_THRESHOLD
144#define POWM_THRESHOLD  ((8 * SQR_KARATSUBA_THRESHOLD) / 3)
145#endif
146
147#define HANDLE_NEGATIVE_EXPONENT 1
148#undef REDUCE_EXPONENT
149
150void
151#ifndef BERKELEY_MP
152mpz_powm (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m)
153#else /* BERKELEY_MP */
154pow (mpz_srcptr b, mpz_srcptr e, mpz_srcptr m, mpz_ptr r)
155#endif /* BERKELEY_MP */
156{
157  mp_ptr xp, tp, qp, gp, this_gp;
158  mp_srcptr bp, ep, mp;
159  mp_size_t bn, es, en, mn, xn;
160  mp_limb_t invm, c;
161  unsigned long int enb;
162  mp_size_t i, K, j, l, k;
163  int m_zero_cnt, e_zero_cnt;
164  int sh;
165  int use_redc;
166#if HANDLE_NEGATIVE_EXPONENT
167  mpz_t new_b;
168#endif
169#if REDUCE_EXPONENT
170  mpz_t new_e;
171#endif
172  TMP_DECL (marker);
173
174  mp = PTR(m);
175  mn = ABSIZ (m);
176  if (mn == 0)
177    DIVIDE_BY_ZERO;
178
179  TMP_MARK (marker);
180
181  es = SIZ (e);
182  if (es <= 0)
183    {
184      if (es == 0)
185        {
186          /* Exponent is zero, result is 1 mod m, i.e., 1 or 0 depending on if
187             m equals 1.  */
188          SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1;
189          PTR(r)[0] = 1;
190          TMP_FREE (marker);    /* we haven't really allocated anything here */
191          return;
192        }
193#if HANDLE_NEGATIVE_EXPONENT
194      MPZ_TMP_INIT (new_b, mn + 1);
195
196      if (! mpz_invert (new_b, b, m))
197        DIVIDE_BY_ZERO;
198      b = new_b;
199      es = -es;
200#else
201      DIVIDE_BY_ZERO;
202#endif
203    }
204  en = es;
205
206#if REDUCE_EXPONENT
207  /* Reduce exponent by dividing it by phi(m) when m small.  */
208  if (mn == 1 && mp[0] < 0x7fffffffL && en * GMP_NUMB_BITS > 150)
209    {
210      MPZ_TMP_INIT (new_e, 2);
211      mpz_mod_ui (new_e, e, phi (mp[0]));
212      e = new_e;
213    }
214#endif
215
216  use_redc = mn < POWM_THRESHOLD && mp[0] % 2 != 0;
217  if (use_redc)
218    {
219      /* invm = -1/m mod 2^BITS_PER_MP_LIMB, must have m odd */
220      modlimb_invert (invm, mp[0]);
221      invm = -invm;
222    }
223  else
224    {
225      /* Normalize m (i.e. make its most significant bit set) as required by
226         division functions below.  */
227      count_leading_zeros (m_zero_cnt, mp[mn - 1]);
228      m_zero_cnt -= GMP_NAIL_BITS;
229      if (m_zero_cnt != 0)
230        {
231          mp_ptr new_mp;
232          new_mp = TMP_ALLOC_LIMBS (mn);
233          mpn_lshift (new_mp, mp, mn, m_zero_cnt);
234          mp = new_mp;
235        }
236    }
237
238  /* Determine optimal value of k, the number of exponent bits we look at
239     at a time.  */
240  count_leading_zeros (e_zero_cnt, PTR(e)[en - 1]);
241  e_zero_cnt -= GMP_NAIL_BITS;
242  enb = en * GMP_NUMB_BITS - e_zero_cnt; /* number of bits of exponent */
243  k = 1;
244  K = 2;
245  while (2 * enb > K * (2 + k * (3 + k)))
246    {
247      k++;
248      K *= 2;
249    }
250
251  tp = TMP_ALLOC_LIMBS (2 * mn + 1);
252  qp = TMP_ALLOC_LIMBS (mn + 1);
253
254  gp = __GMP_ALLOCATE_FUNC_LIMBS (K / 2 * mn);
255
256  /* Compute x*R^n where R=2^BITS_PER_MP_LIMB.  */
257  bn = ABSIZ (b);
258  bp = PTR(b);
259  /* Handle |b| >= m by computing b mod m.  FIXME: It is not strictly necessary
260     for speed or correctness to do this when b and m have the same number of
261     limbs, perhaps remove mpn_cmp call.  */
262  if (bn > mn || (bn == mn && mpn_cmp (bp, mp, mn) >= 0))
263    {
264      /* Reduce possibly huge base while moving it to gp[0].  Use a function
265         call to reduce, since we don't want the quotient allocation to
266         live until function return.  */
267      if (use_redc)
268        {
269          reduce (tp + mn, bp, bn, mp, mn);     /* b mod m */
270          MPN_ZERO (tp, mn);
271          mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); /* unnormnalized! */
272        }
273      else
274        {
275          reduce (gp, bp, bn, mp, mn);
276        }
277    }
278  else
279    {
280      /* |b| < m.  We pad out operands to become mn limbs,  which simplifies
281         the rest of the function, but slows things down when the |b| << m.  */
282      if (use_redc)
283        {
284          MPN_ZERO (tp, mn);
285          MPN_COPY (tp + mn, bp, bn);
286          MPN_ZERO (tp + mn + bn, mn - bn);
287          mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn);
288        }
289      else
290        {
291          MPN_COPY (gp, bp, bn);
292          MPN_ZERO (gp + bn, mn - bn);
293        }
294    }
295
296  /* Compute xx^i for odd g < 2^i.  */
297
298  xp = TMP_ALLOC_LIMBS (mn);
299  mpn_sqr_n (tp, gp, mn);
300  if (use_redc)
301    redc (xp, mp, mn, invm, tp);                /* xx = x^2*R^n */
302  else
303    mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
304  this_gp = gp;
305  for (i = 1; i < K / 2; i++)
306    {
307      mpn_mul_n (tp, this_gp, xp, mn);
308      this_gp += mn;
309      if (use_redc)
310        redc (this_gp, mp, mn, invm, tp);       /* g[i] = x^(2i+1)*R^n */
311      else
312        mpn_tdiv_qr (qp, this_gp, 0L, tp, 2 * mn, mp, mn);
313    }
314
315  /* Start the real stuff.  */
316  ep = PTR (e);
317  i = en - 1;                           /* current index */
318  c = ep[i];                            /* current limb */
319  sh = GMP_NUMB_BITS - e_zero_cnt;      /* significant bits in ep[i] */
320  sh -= k;                              /* index of lower bit of ep[i] to take into account */
321  if (sh < 0)
322    {                                   /* k-sh extra bits are needed */
323      if (i > 0)
324        {
325          i--;
326          c <<= (-sh);
327          sh += GMP_NUMB_BITS;
328          c |= ep[i] >> sh;
329        }
330    }
331  else
332    c >>= sh;
333
334  for (j = 0; c % 2 == 0; j++)
335    c >>= 1;
336
337  MPN_COPY (xp, gp + mn * (c >> 1), mn);
338  while (--j >= 0)
339    {
340      mpn_sqr_n (tp, xp, mn);
341      if (use_redc)
342        redc (xp, mp, mn, invm, tp);
343      else
344        mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
345    }
346
347  while (i > 0 || sh > 0)
348    {
349      c = ep[i];
350      l = k;                            /* number of bits treated */
351      sh -= l;
352      if (sh < 0)
353        {
354          if (i > 0)
355            {
356              i--;
357              c <<= (-sh);
358              sh += GMP_NUMB_BITS;
359              c |= ep[i] >> sh;
360            }
361          else
362            {
363              l += sh;                  /* last chunk of bits from e; l < k */
364            }
365        }
366      else
367        c >>= sh;
368      c &= ((mp_limb_t) 1 << l) - 1;
369
370      /* This while loop implements the sliding window improvement--loop while
371         the most significant bit of c is zero, squaring xx as we go. */
372      while ((c >> (l - 1)) == 0 && (i > 0 || sh > 0))
373        {
374          mpn_sqr_n (tp, xp, mn);
375          if (use_redc)
376            redc (xp, mp, mn, invm, tp);
377          else
378            mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
379          if (sh != 0)
380            {
381              sh--;
382              c = (c << 1) + ((ep[i] >> sh) & 1);
383            }
384          else
385            {
386              i--;
387              sh = GMP_NUMB_BITS - 1;
388              c = (c << 1) + (ep[i] >> sh);
389            }
390        }
391
392      /* Replace xx by xx^(2^l)*x^c.  */
393      if (c != 0)
394        {
395          for (j = 0; c % 2 == 0; j++)
396            c >>= 1;
397
398          /* c0 = c * 2^j, i.e. xx^(2^l)*x^c = (A^(2^(l - j))*c)^(2^j) */
399          l -= j;
400          while (--l >= 0)
401            {
402              mpn_sqr_n (tp, xp, mn);
403              if (use_redc)
404                redc (xp, mp, mn, invm, tp);
405              else
406                mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
407            }
408          mpn_mul_n (tp, xp, gp + mn * (c >> 1), mn);
409          if (use_redc)
410            redc (xp, mp, mn, invm, tp);
411          else
412            mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
413        }
414      else
415        j = l;                          /* case c=0 */
416      while (--j >= 0)
417        {
418          mpn_sqr_n (tp, xp, mn);
419          if (use_redc)
420            redc (xp, mp, mn, invm, tp);
421          else
422            mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
423        }
424    }
425
426  if (use_redc)
427    {
428      /* Convert back xx to xx/R^n.  */
429      MPN_COPY (tp, xp, mn);
430      MPN_ZERO (tp + mn, mn);
431      redc (xp, mp, mn, invm, tp);
432      if (mpn_cmp (xp, mp, mn) >= 0)
433        mpn_sub_n (xp, xp, mp, mn);
434    }
435  else
436    {
437      if (m_zero_cnt != 0)
438        {
439          mp_limb_t cy;
440          cy = mpn_lshift (tp, xp, mn, m_zero_cnt);
441          tp[mn] = cy;
442          mpn_tdiv_qr (qp, xp, 0L, tp, mn + (cy != 0), mp, mn);
443          mpn_rshift (xp, xp, mn, m_zero_cnt);
444        }
445    }
446  xn = mn;
447  MPN_NORMALIZE (xp, xn);
448
449  if ((ep[0] & 1) && SIZ(b) < 0 && xn != 0)
450    {
451      mp = PTR(m);                    /* want original, unnormalized m */
452      mpn_sub (xp, mp, mn, xp, xn);
453      xn = mn;
454      MPN_NORMALIZE (xp, xn);
455    }
456  MPZ_REALLOC (r, xn);
457  SIZ (r) = xn;
458  MPN_COPY (PTR(r), xp, xn);
459
460  __GMP_FREE_FUNC_LIMBS (gp, K / 2 * mn);
461  TMP_FREE (marker);
462}
Note: See TracBrowser for help on using the repository browser.