1 | /* mpz_n_pow_ui -- mpn raised to ulong. |
---|
2 | |
---|
3 | Copyright 2001, 2002 Free Software Foundation, Inc. |
---|
4 | |
---|
5 | This file is part of the GNU MP Library. |
---|
6 | |
---|
7 | The GNU MP Library is free software; you can redistribute it and/or modify |
---|
8 | it under the terms of the GNU Lesser General Public License as published by |
---|
9 | the Free Software Foundation; either version 2.1 of the License, or (at your |
---|
10 | option) any later version. |
---|
11 | |
---|
12 | The GNU MP Library is distributed in the hope that it will be useful, but |
---|
13 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
---|
14 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
---|
15 | License for more details. |
---|
16 | |
---|
17 | You should have received a copy of the GNU Lesser General Public License |
---|
18 | along with the GNU MP Library; see the file COPYING.LIB. If not, write to |
---|
19 | the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
---|
20 | MA 02111-1307, USA. */ |
---|
21 | |
---|
22 | #include "gmp.h" |
---|
23 | #include "gmp-impl.h" |
---|
24 | #include "longlong.h" |
---|
25 | |
---|
26 | |
---|
27 | /* Change this to "#define TRACE(x) x" for some traces. */ |
---|
28 | #define TRACE(x) |
---|
29 | |
---|
30 | |
---|
31 | /* Use this to test the mul_2 code on a CPU without a native version of that |
---|
32 | routine. */ |
---|
33 | #if 0 |
---|
34 | #define mpn_mul_2 refmpn_mul_2 |
---|
35 | #define HAVE_NATIVE_mpn_mul_2 1 |
---|
36 | #endif |
---|
37 | |
---|
38 | |
---|
39 | /* mpz_pow_ui and mpz_ui_pow_ui want to share almost all of this code. |
---|
40 | ui_pow_ui doesn't need the mpn_mul based powering loop or the tests on |
---|
41 | bsize==2 or >2, but separating that isn't easy because there's shared |
---|
42 | code both before and after (the size calculations and the powers of 2 |
---|
43 | handling). |
---|
44 | |
---|
45 | Alternatives: |
---|
46 | |
---|
47 | It would work to just use the mpn_mul powering loop for 1 and 2 limb |
---|
48 | bases, but the current separate loop allows mul_1 and mul_2 to be done |
---|
49 | in-place, which might help cache locality a bit. If mpn_mul was relaxed |
---|
50 | to allow source==dest when vn==1 or 2 then some pointer twiddling might |
---|
51 | let us get the same effect in one loop. |
---|
52 | |
---|
53 | The initial powering for bsize==1 into blimb or blimb:blimb_low doesn't |
---|
54 | form the biggest possible power of b that fits, only the biggest power of |
---|
55 | 2 power, ie. b^(2^n). It'd be possible to choose a bigger power, perhaps |
---|
56 | using __mp_bases[b].big_base for small b, and thereby get better value |
---|
57 | from mpn_mul_1 or mpn_mul_2 in the bignum powering. It's felt that doing |
---|
58 | so would be more complicated than it's worth, and could well end up being |
---|
59 | a slowdown for small e. For big e on the other hand the algorithm is |
---|
60 | dominated by mpn_sqr_n so there wouldn't much of a saving. The current |
---|
61 | code can be viewed as simply doing the first few steps of the powering in |
---|
62 | a single or double limb where possible. |
---|
63 | |
---|
64 | If r==b, and blow_twos==0, and r must be realloc'ed, then the temporary |
---|
65 | copy made of b is unnecessary. We could just use the old alloc'ed block |
---|
66 | and free it at the end. But arranging this seems like a lot more trouble |
---|
67 | than it's worth. */ |
---|
68 | |
---|
69 | |
---|
70 | /* floor(sqrt(GMP_NUMB_MAX)), ie. the biggest value that can be squared in |
---|
71 | a limb without overflowing. |
---|
72 | FIXME: This formula is an underestimate when GMP_NUMB_BITS is odd. */ |
---|
73 | |
---|
74 | #define GMP_NUMB_HALFMAX (((mp_limb_t) 1 << GMP_NUMB_BITS/2) - 1) |
---|
75 | |
---|
76 | |
---|
77 | /* The following are for convenience, they update the size and check the |
---|
78 | alloc. */ |
---|
79 | |
---|
80 | #define MPN_SQR_N(dst, alloc, src, size) \ |
---|
81 | do { \ |
---|
82 | ASSERT (2*(size) <= (alloc)); \ |
---|
83 | mpn_sqr_n (dst, src, size); \ |
---|
84 | (size) *= 2; \ |
---|
85 | (size) -= ((dst)[(size)-1] == 0); \ |
---|
86 | } while (0) |
---|
87 | |
---|
88 | #define MPN_MUL(dst, alloc, src, size, src2, size2) \ |
---|
89 | do { \ |
---|
90 | mp_limb_t cy; \ |
---|
91 | ASSERT ((size) + (size2) <= (alloc)); \ |
---|
92 | cy = mpn_mul (dst, src, size, src2, size2); \ |
---|
93 | (size) += (size2) - (cy == 0); \ |
---|
94 | } while (0) |
---|
95 | |
---|
96 | #define MPN_MUL_2(ptr, size, alloc, mult) \ |
---|
97 | do { \ |
---|
98 | mp_limb_t cy; \ |
---|
99 | ASSERT ((size)+2 <= (alloc)); \ |
---|
100 | cy = mpn_mul_2 (ptr, ptr, size, mult); \ |
---|
101 | (size)++; \ |
---|
102 | (ptr)[(size)] = cy; \ |
---|
103 | (size) += (cy != 0); \ |
---|
104 | } while (0) |
---|
105 | |
---|
106 | #define MPN_MUL_1(ptr, size, alloc, limb) \ |
---|
107 | do { \ |
---|
108 | mp_limb_t cy; \ |
---|
109 | ASSERT ((size)+1 <= (alloc)); \ |
---|
110 | cy = mpn_mul_1 (ptr, ptr, size, limb); \ |
---|
111 | (ptr)[size] = cy; \ |
---|
112 | (size) += (cy != 0); \ |
---|
113 | } while (0) |
---|
114 | |
---|
115 | #define MPN_LSHIFT(ptr, size, alloc, shift) \ |
---|
116 | do { \ |
---|
117 | mp_limb_t cy; \ |
---|
118 | ASSERT ((size)+1 <= (alloc)); \ |
---|
119 | cy = mpn_lshift (ptr, ptr, size, shift); \ |
---|
120 | (ptr)[size] = cy; \ |
---|
121 | (size) += (cy != 0); \ |
---|
122 | } while (0) |
---|
123 | |
---|
124 | #define MPN_RSHIFT_OR_COPY(dst, src, size, shift) \ |
---|
125 | do { \ |
---|
126 | if ((shift) == 0) \ |
---|
127 | MPN_COPY (dst, src, size); \ |
---|
128 | else \ |
---|
129 | { \ |
---|
130 | mpn_rshift (dst, src, size, shift); \ |
---|
131 | (size) -= ((dst)[(size)-1] == 0); \ |
---|
132 | } \ |
---|
133 | } while (0) |
---|
134 | |
---|
135 | |
---|
136 | /* ralloc and talloc are only wanted for ASSERTs, after the initial space |
---|
137 | allocations. Avoid writing values to them in a normal build, to ensure |
---|
138 | the compiler lets them go dead. gcc already figures this out itself |
---|
139 | actually. */ |
---|
140 | |
---|
141 | #define SWAP_RP_TP \ |
---|
142 | do { \ |
---|
143 | MP_PTR_SWAP (rp, tp); \ |
---|
144 | ASSERT_CODE (MP_SIZE_T_SWAP (ralloc, talloc)); \ |
---|
145 | } while (0) |
---|
146 | |
---|
147 | |
---|
148 | void |
---|
149 | mpz_n_pow_ui (mpz_ptr r, mp_srcptr bp, mp_size_t bsize, unsigned long int e) |
---|
150 | { |
---|
151 | mp_ptr rp; |
---|
152 | mp_size_t rtwos_limbs, ralloc, rsize; |
---|
153 | int rneg, i, cnt, btwos, r_bp_overlap; |
---|
154 | mp_limb_t blimb, rl; |
---|
155 | unsigned long rtwos_bits; |
---|
156 | #if HAVE_NATIVE_mpn_mul_2 |
---|
157 | mp_limb_t blimb_low, rl_high; |
---|
158 | #else |
---|
159 | mp_limb_t b_twolimbs[2]; |
---|
160 | #endif |
---|
161 | TMP_DECL (marker); |
---|
162 | |
---|
163 | TRACE (printf ("mpz_n_pow_ui rp=0x%lX bp=0x%lX bsize=%ld e=%lu (0x%lX)\n", |
---|
164 | PTR(r), bp, bsize, e, e); |
---|
165 | mpn_trace ("b", bp, bsize)); |
---|
166 | |
---|
167 | ASSERT (bsize == 0 || bp[ABS(bsize)-1] != 0); |
---|
168 | ASSERT (MPN_SAME_OR_SEPARATE2_P (PTR(r), ABSIZ(r), bp, bsize)); |
---|
169 | |
---|
170 | /* b^0 == 1, including 0^0 == 1 */ |
---|
171 | if (e == 0) |
---|
172 | { |
---|
173 | PTR(r)[0] = 1; |
---|
174 | SIZ(r) = 1; |
---|
175 | return; |
---|
176 | } |
---|
177 | |
---|
178 | /* 0^e == 0 apart from 0^0 above */ |
---|
179 | if (bsize == 0) |
---|
180 | { |
---|
181 | SIZ(r) = 0; |
---|
182 | return; |
---|
183 | } |
---|
184 | |
---|
185 | /* Sign of the final result. */ |
---|
186 | rneg = (bsize < 0 && (e & 1) != 0); |
---|
187 | bsize = ABS (bsize); |
---|
188 | TRACE (printf ("rneg %d\n", rneg)); |
---|
189 | |
---|
190 | r_bp_overlap = (PTR(r) == bp); |
---|
191 | |
---|
192 | /* Strip low zero limbs from b. */ |
---|
193 | rtwos_limbs = 0; |
---|
194 | for (blimb = *bp; blimb == 0; blimb = *++bp) |
---|
195 | { |
---|
196 | rtwos_limbs += e; |
---|
197 | bsize--; ASSERT (bsize >= 1); |
---|
198 | } |
---|
199 | TRACE (printf ("trailing zero rtwos_limbs=%ld\n", rtwos_limbs)); |
---|
200 | |
---|
201 | /* Strip low zero bits from b. */ |
---|
202 | count_trailing_zeros (btwos, blimb); |
---|
203 | blimb >>= btwos; |
---|
204 | rtwos_bits = e * btwos; |
---|
205 | rtwos_limbs += rtwos_bits / GMP_NUMB_BITS; |
---|
206 | rtwos_bits %= GMP_NUMB_BITS; |
---|
207 | TRACE (printf ("trailing zero btwos=%d rtwos_limbs=%ld rtwos_bits=%lu\n", |
---|
208 | btwos, rtwos_limbs, rtwos_bits)); |
---|
209 | |
---|
210 | TMP_MARK (marker); |
---|
211 | |
---|
212 | rl = 1; |
---|
213 | #if HAVE_NATIVE_mpn_mul_2 |
---|
214 | rl_high = 0; |
---|
215 | #endif |
---|
216 | |
---|
217 | if (bsize == 1) |
---|
218 | { |
---|
219 | bsize_1: |
---|
220 | /* Power up as far as possible within blimb. We start here with e!=0, |
---|
221 | but if e is small then we might reach e==0 and the whole b^e in rl. |
---|
222 | Notice this code works when blimb==1 too, reaching e==0. */ |
---|
223 | |
---|
224 | while (blimb <= GMP_NUMB_HALFMAX) |
---|
225 | { |
---|
226 | TRACE (printf ("small e=0x%lX blimb=0x%lX rl=0x%lX\n", |
---|
227 | e, blimb, rl)); |
---|
228 | ASSERT (e != 0); |
---|
229 | if ((e & 1) != 0) |
---|
230 | rl *= blimb; |
---|
231 | e >>= 1; |
---|
232 | if (e == 0) |
---|
233 | goto got_rl; |
---|
234 | blimb *= blimb; |
---|
235 | } |
---|
236 | |
---|
237 | #if HAVE_NATIVE_mpn_mul_2 |
---|
238 | TRACE (printf ("single power, e=0x%lX b=0x%lX rl=0x%lX\n", |
---|
239 | e, blimb, rl)); |
---|
240 | |
---|
241 | /* Can power b once more into blimb:blimb_low */ |
---|
242 | bsize = 2; |
---|
243 | ASSERT (e != 0); |
---|
244 | if ((e & 1) != 0) |
---|
245 | { |
---|
246 | umul_ppmm (rl_high, rl, rl, blimb << GMP_NAIL_BITS); |
---|
247 | rl >>= GMP_NAIL_BITS; |
---|
248 | } |
---|
249 | e >>= 1; |
---|
250 | umul_ppmm (blimb, blimb_low, blimb, blimb << GMP_NAIL_BITS); |
---|
251 | blimb_low >>= GMP_NAIL_BITS; |
---|
252 | |
---|
253 | got_rl: |
---|
254 | TRACE (printf ("double power e=0x%lX blimb=0x%lX:0x%lX rl=0x%lX:%lX\n", |
---|
255 | e, blimb, blimb_low, rl_high, rl)); |
---|
256 | |
---|
257 | /* Combine left-over rtwos_bits into rl_high:rl to be handled by the |
---|
258 | final mul_1 or mul_2 rather than a separate lshift. |
---|
259 | - rl_high:rl mustn't be 1 (since then there's no final mul) |
---|
260 | - rl_high mustn't overflow |
---|
261 | - rl_high mustn't change to non-zero, since mul_1+lshift is |
---|
262 | probably faster than mul_2 (FIXME: is this true?) */ |
---|
263 | |
---|
264 | if (rtwos_bits != 0 |
---|
265 | && ! (rl_high == 0 && rl == 1) |
---|
266 | && (rl_high >> (GMP_NUMB_BITS-rtwos_bits)) == 0) |
---|
267 | { |
---|
268 | mp_limb_t new_rl_high = (rl_high << rtwos_bits) |
---|
269 | | (rl >> (GMP_NUMB_BITS-rtwos_bits)); |
---|
270 | if (! (rl_high == 0 && new_rl_high != 0)) |
---|
271 | { |
---|
272 | rl_high = new_rl_high; |
---|
273 | rl <<= rtwos_bits; |
---|
274 | rtwos_bits = 0; |
---|
275 | TRACE (printf ("merged rtwos_bits, rl=0x%lX:%lX\n", |
---|
276 | rl_high, rl)); |
---|
277 | } |
---|
278 | } |
---|
279 | #else |
---|
280 | got_rl: |
---|
281 | TRACE (printf ("small power e=0x%lX blimb=0x%lX rl=0x%lX\n", |
---|
282 | e, blimb, rl)); |
---|
283 | |
---|
284 | /* Combine left-over rtwos_bits into rl to be handled by the final |
---|
285 | mul_1 rather than a separate lshift. |
---|
286 | - rl mustn't be 1 (since then there's no final mul) |
---|
287 | - rl mustn't overflow */ |
---|
288 | |
---|
289 | if (rtwos_bits != 0 |
---|
290 | && rl != 1 |
---|
291 | && (rl >> (GMP_NUMB_BITS-rtwos_bits)) == 0) |
---|
292 | { |
---|
293 | rl <<= rtwos_bits; |
---|
294 | rtwos_bits = 0; |
---|
295 | TRACE (printf ("merged rtwos_bits, rl=0x%lX\n", rl)); |
---|
296 | } |
---|
297 | #endif |
---|
298 | } |
---|
299 | else if (bsize == 2) |
---|
300 | { |
---|
301 | mp_limb_t bsecond = bp[1]; |
---|
302 | if (btwos != 0) |
---|
303 | blimb |= (bsecond << (GMP_NUMB_BITS - btwos)) & GMP_NUMB_MASK; |
---|
304 | bsecond >>= btwos; |
---|
305 | if (bsecond == 0) |
---|
306 | { |
---|
307 | /* Two limbs became one after rshift. */ |
---|
308 | bsize = 1; |
---|
309 | goto bsize_1; |
---|
310 | } |
---|
311 | |
---|
312 | TRACE (printf ("bsize==2 using b=0x%lX:%lX", bsecond, blimb)); |
---|
313 | #if HAVE_NATIVE_mpn_mul_2 |
---|
314 | blimb_low = blimb; |
---|
315 | #else |
---|
316 | bp = b_twolimbs; |
---|
317 | b_twolimbs[0] = blimb; |
---|
318 | b_twolimbs[1] = bsecond; |
---|
319 | #endif |
---|
320 | blimb = bsecond; |
---|
321 | } |
---|
322 | else |
---|
323 | { |
---|
324 | if (r_bp_overlap || btwos != 0) |
---|
325 | { |
---|
326 | mp_ptr tp = TMP_ALLOC_LIMBS (bsize); |
---|
327 | MPN_RSHIFT_OR_COPY (tp, bp, bsize, btwos); |
---|
328 | bp = tp; |
---|
329 | TRACE (printf ("rshift or copy bp,bsize, new bsize=%ld\n", bsize)); |
---|
330 | } |
---|
331 | #if HAVE_NATIVE_mpn_mul_2 |
---|
332 | /* in case 3 limbs rshift to 2 and hence use the mul_2 loop below */ |
---|
333 | blimb_low = bp[0]; |
---|
334 | #endif |
---|
335 | blimb = bp[bsize-1]; |
---|
336 | |
---|
337 | TRACE (printf ("big bsize=%ld ", bsize); |
---|
338 | mpn_trace ("b", bp, bsize)); |
---|
339 | } |
---|
340 | |
---|
341 | /* At this point blimb is the most significant limb of the base to use. |
---|
342 | |
---|
343 | Each factor of b takes (bsize*BPML-cnt) bits and there's e of them; +1 |
---|
344 | limb to round up the division; +1 for multiplies all using an extra |
---|
345 | limb over the true size; +2 for rl at the end; +1 for lshift at the |
---|
346 | end. |
---|
347 | |
---|
348 | The size calculation here is reasonably accurate. The base is at least |
---|
349 | half a limb, so in 32 bits the worst case is 2^16+1 treated as 17 bits |
---|
350 | when it will power up as just over 16, an overestimate of 17/16 = |
---|
351 | 6.25%. For a 64-bit limb it's half that. |
---|
352 | |
---|
353 | If e==0 then blimb won't be anything useful (though it will be |
---|
354 | non-zero), but that doesn't matter since we just end up with ralloc==5, |
---|
355 | and that's fine for 2 limbs of rl and 1 of lshift. */ |
---|
356 | |
---|
357 | ASSERT (blimb != 0); |
---|
358 | count_leading_zeros (cnt, blimb); |
---|
359 | ralloc = (bsize*GMP_NUMB_BITS - cnt + GMP_NAIL_BITS) * e / GMP_NUMB_BITS + 5; |
---|
360 | TRACE (printf ("ralloc %ld, from bsize=%ld blimb=0x%lX cnt=%d\n", |
---|
361 | ralloc, bsize, blimb, cnt)); |
---|
362 | MPZ_REALLOC (r, ralloc + rtwos_limbs); |
---|
363 | rp = PTR(r); |
---|
364 | |
---|
365 | /* Low zero limbs resulting from powers of 2. */ |
---|
366 | MPN_ZERO (rp, rtwos_limbs); |
---|
367 | rp += rtwos_limbs; |
---|
368 | |
---|
369 | if (e == 0) |
---|
370 | { |
---|
371 | /* Any e==0 other than via bsize==1 or bsize==2 is covered at the |
---|
372 | start. */ |
---|
373 | rp[0] = rl; |
---|
374 | rsize = 1; |
---|
375 | #if HAVE_NATIVE_mpn_mul_2 |
---|
376 | rp[1] = rl_high; |
---|
377 | rsize += (rl_high != 0); |
---|
378 | #endif |
---|
379 | ASSERT (rp[rsize-1] != 0); |
---|
380 | } |
---|
381 | else |
---|
382 | { |
---|
383 | mp_ptr tp; |
---|
384 | mp_size_t talloc; |
---|
385 | |
---|
386 | /* In the mpn_mul_1 or mpn_mul_2 loops or in the mpn_mul loop when the |
---|
387 | low bit of e is zero, tp only has to hold the second last power |
---|
388 | step, which is half the size of the final result. There's no need |
---|
389 | to round up the divide by 2, since ralloc includes a +2 for rl |
---|
390 | which not needed by tp. In the mpn_mul loop when the low bit of e |
---|
391 | is 1, tp must hold nearly the full result, so just size it the same |
---|
392 | as rp. */ |
---|
393 | |
---|
394 | talloc = ralloc; |
---|
395 | #if HAVE_NATIVE_mpn_mul_2 |
---|
396 | if (bsize <= 2 || (e & 1) == 0) |
---|
397 | talloc /= 2; |
---|
398 | #else |
---|
399 | if (bsize <= 1 || (e & 1) == 0) |
---|
400 | talloc /= 2; |
---|
401 | #endif |
---|
402 | TRACE (printf ("talloc %ld\n", talloc)); |
---|
403 | tp = TMP_ALLOC_LIMBS (talloc); |
---|
404 | |
---|
405 | /* Go from high to low over the bits of e, starting with i pointing at |
---|
406 | the bit below the highest 1 (which will mean i==-1 if e==1). */ |
---|
407 | count_leading_zeros (cnt, e); |
---|
408 | i = GMP_LIMB_BITS - cnt - 2; |
---|
409 | |
---|
410 | #if HAVE_NATIVE_mpn_mul_2 |
---|
411 | if (bsize <= 2) |
---|
412 | { |
---|
413 | mp_limb_t mult[2]; |
---|
414 | |
---|
415 | /* Any bsize==1 will have been powered above to be two limbs. */ |
---|
416 | ASSERT (bsize == 2); |
---|
417 | ASSERT (blimb != 0); |
---|
418 | |
---|
419 | /* Arrange the final result ends up in r, not in the temp space */ |
---|
420 | if ((i & 1) == 0) |
---|
421 | SWAP_RP_TP; |
---|
422 | |
---|
423 | rp[0] = blimb_low; |
---|
424 | rp[1] = blimb; |
---|
425 | rsize = 2; |
---|
426 | |
---|
427 | mult[0] = blimb_low; |
---|
428 | mult[1] = blimb; |
---|
429 | |
---|
430 | for ( ; i >= 0; i--) |
---|
431 | { |
---|
432 | TRACE (printf ("mul_2 loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n", |
---|
433 | i, e, rsize, ralloc, talloc); |
---|
434 | mpn_trace ("r", rp, rsize)); |
---|
435 | |
---|
436 | MPN_SQR_N (tp, talloc, rp, rsize); |
---|
437 | SWAP_RP_TP; |
---|
438 | if ((e & (1L << i)) != 0) |
---|
439 | MPN_MUL_2 (rp, rsize, ralloc, mult); |
---|
440 | } |
---|
441 | |
---|
442 | TRACE (mpn_trace ("mul_2 before rl, r", rp, rsize)); |
---|
443 | if (rl_high != 0) |
---|
444 | { |
---|
445 | mult[0] = rl; |
---|
446 | mult[1] = rl_high; |
---|
447 | MPN_MUL_2 (rp, rsize, ralloc, mult); |
---|
448 | } |
---|
449 | else if (rl != 1) |
---|
450 | MPN_MUL_1 (rp, rsize, ralloc, rl); |
---|
451 | } |
---|
452 | #else |
---|
453 | if (bsize == 1) |
---|
454 | { |
---|
455 | /* Arrange the final result ends up in r, not in the temp space */ |
---|
456 | if ((i & 1) == 0) |
---|
457 | SWAP_RP_TP; |
---|
458 | |
---|
459 | rp[0] = blimb; |
---|
460 | rsize = 1; |
---|
461 | |
---|
462 | for ( ; i >= 0; i--) |
---|
463 | { |
---|
464 | TRACE (printf ("mul_1 loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n", |
---|
465 | i, e, rsize, ralloc, talloc); |
---|
466 | mpn_trace ("r", rp, rsize)); |
---|
467 | |
---|
468 | MPN_SQR_N (tp, talloc, rp, rsize); |
---|
469 | SWAP_RP_TP; |
---|
470 | if ((e & (1L << i)) != 0) |
---|
471 | MPN_MUL_1 (rp, rsize, ralloc, blimb); |
---|
472 | } |
---|
473 | |
---|
474 | TRACE (mpn_trace ("mul_1 before rl, r", rp, rsize)); |
---|
475 | if (rl != 1) |
---|
476 | MPN_MUL_1 (rp, rsize, ralloc, rl); |
---|
477 | } |
---|
478 | #endif |
---|
479 | else |
---|
480 | { |
---|
481 | int parity; |
---|
482 | |
---|
483 | /* Arrange the final result ends up in r, not in the temp space */ |
---|
484 | ULONG_PARITY (parity, e); |
---|
485 | if (((parity ^ i) & 1) != 0) |
---|
486 | SWAP_RP_TP; |
---|
487 | |
---|
488 | MPN_COPY (rp, bp, bsize); |
---|
489 | rsize = bsize; |
---|
490 | |
---|
491 | for ( ; i >= 0; i--) |
---|
492 | { |
---|
493 | TRACE (printf ("mul loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n", |
---|
494 | i, e, rsize, ralloc, talloc); |
---|
495 | mpn_trace ("r", rp, rsize)); |
---|
496 | |
---|
497 | MPN_SQR_N (tp, talloc, rp, rsize); |
---|
498 | SWAP_RP_TP; |
---|
499 | if ((e & (1L << i)) != 0) |
---|
500 | { |
---|
501 | MPN_MUL (tp, talloc, rp, rsize, bp, bsize); |
---|
502 | SWAP_RP_TP; |
---|
503 | } |
---|
504 | } |
---|
505 | } |
---|
506 | } |
---|
507 | |
---|
508 | ASSERT (rp == PTR(r) + rtwos_limbs); |
---|
509 | TRACE (mpn_trace ("end loop r", rp, rsize)); |
---|
510 | TMP_FREE (marker); |
---|
511 | |
---|
512 | /* Apply any partial limb factors of 2. */ |
---|
513 | if (rtwos_bits != 0) |
---|
514 | { |
---|
515 | MPN_LSHIFT (rp, rsize, ralloc, (unsigned) rtwos_bits); |
---|
516 | TRACE (mpn_trace ("lshift r", rp, rsize)); |
---|
517 | } |
---|
518 | |
---|
519 | rsize += rtwos_limbs; |
---|
520 | SIZ(r) = (rneg ? -rsize : rsize); |
---|
521 | } |
---|