#
source:
trunk/third/gmp/tests/refmpn.c
@
22254

Revision 22254, 38.0 KB checked in by ghudson, 19 years ago (diff) |
---|

Line | |
---|---|

1 | /* Reference mpn functions, designed to be simple, portable and independent |

2 | of the normal gmp code. Speed isn't a consideration. |

3 | |

4 | Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2004 Free Software |

5 | Foundation, Inc. |

6 | |

7 | This file is part of the GNU MP Library. |

8 | |

9 | The GNU MP Library is free software; you can redistribute it and/or modify |

10 | it under the terms of the GNU Lesser General Public License as published by |

11 | the Free Software Foundation; either version 2.1 of the License, or (at your |

12 | option) any later version. |

13 | |

14 | The GNU MP Library is distributed in the hope that it will be useful, but |

15 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |

16 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |

17 | License for more details. |

18 | |

19 | You should have received a copy of the GNU Lesser General Public License |

20 | along with the GNU MP Library; see the file COPYING.LIB. If not, write to |

21 | the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |

22 | MA 02111-1307, USA. */ |

23 | |

24 | |

25 | /* Most routines have assertions representing what the mpn routines are |

26 | supposed to accept. Many of these reference routines do sensible things |

27 | outside these ranges (eg. for size==0), but the assertions are present to |

28 | pick up bad parameters passed here that are about to be passed the same |

29 | to a real mpn routine being compared. */ |

30 | |

31 | /* always do assertion checking */ |

32 | #define WANT_ASSERT 1 |

33 | |

34 | #include <stdio.h> /* for NULL */ |

35 | #include <stdlib.h> /* for malloc */ |

36 | |

37 | #include "gmp.h" |

38 | #include "gmp-impl.h" |

39 | #include "longlong.h" |

40 | |

41 | #include "tests.h" |

42 | |

43 | |

44 | |

45 | /* Return non-zero if regions {xp,xsize} and {yp,ysize} overlap, with sizes |

46 | in bytes. */ |

47 | int |

48 | byte_overlap_p (const void *v_xp, mp_size_t xsize, |

49 | const void *v_yp, mp_size_t ysize) |

50 | { |

51 | const char *xp = v_xp; |

52 | const char *yp = v_yp; |

53 | |

54 | ASSERT (xsize >= 0); |

55 | ASSERT (ysize >= 0); |

56 | |

57 | /* no wraparounds */ |

58 | ASSERT (xp+xsize >= xp); |

59 | ASSERT (yp+ysize >= yp); |

60 | |

61 | if (xp + xsize <= yp) |

62 | return 0; |

63 | |

64 | if (yp + ysize <= xp) |

65 | return 0; |

66 | |

67 | return 1; |

68 | } |

69 | |

70 | /* Return non-zero if limb regions {xp,xsize} and {yp,ysize} overlap. */ |

71 | int |

72 | refmpn_overlap_p (mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize) |

73 | { |

74 | return byte_overlap_p (xp, xsize * BYTES_PER_MP_LIMB, |

75 | yp, ysize * BYTES_PER_MP_LIMB); |

76 | } |

77 | |

78 | /* Check overlap for a routine defined to work low to high. */ |

79 | int |

80 | refmpn_overlap_low_to_high_p (mp_srcptr dst, mp_srcptr src, mp_size_t size) |

81 | { |

82 | return (dst <= src || ! refmpn_overlap_p (dst, size, src, size)); |

83 | } |

84 | |

85 | /* Check overlap for a routine defined to work high to low. */ |

86 | int |

87 | refmpn_overlap_high_to_low_p (mp_srcptr dst, mp_srcptr src, mp_size_t size) |

88 | { |

89 | return (dst >= src || ! refmpn_overlap_p (dst, size, src, size)); |

90 | } |

91 | |

92 | /* Check overlap for a standard routine requiring equal or separate. */ |

93 | int |

94 | refmpn_overlap_fullonly_p (mp_srcptr dst, mp_srcptr src, mp_size_t size) |

95 | { |

96 | return (dst == src || ! refmpn_overlap_p (dst, size, src, size)); |

97 | } |

98 | int |

99 | refmpn_overlap_fullonly_two_p (mp_srcptr dst, mp_srcptr src1, mp_srcptr src2, |

100 | mp_size_t size) |

101 | { |

102 | return (refmpn_overlap_fullonly_p (dst, src1, size) |

103 | && refmpn_overlap_fullonly_p (dst, src2, size)); |

104 | } |

105 | |

106 | |

107 | mp_ptr |

108 | refmpn_malloc_limbs (mp_size_t size) |

109 | { |

110 | mp_ptr p; |

111 | ASSERT (size >= 0); |

112 | if (size == 0) |

113 | size = 1; |

114 | p = (mp_ptr) malloc (size * BYTES_PER_MP_LIMB); |

115 | ASSERT (p != NULL); |

116 | return p; |

117 | } |

118 | |

119 | mp_ptr |

120 | refmpn_memdup_limbs (mp_srcptr ptr, mp_size_t size) |

121 | { |

122 | mp_ptr p; |

123 | p = refmpn_malloc_limbs (size); |

124 | refmpn_copyi (p, ptr, size); |

125 | return p; |

126 | } |

127 | |

128 | /* malloc n limbs on a multiple of m bytes boundary */ |

129 | mp_ptr |

130 | refmpn_malloc_limbs_aligned (size_t n, size_t m) |

131 | { |

132 | return (mp_ptr) align_pointer (refmpn_malloc_limbs (n + m-1), m); |

133 | } |

134 | |

135 | |

136 | void |

137 | refmpn_fill (mp_ptr ptr, mp_size_t size, mp_limb_t value) |

138 | { |

139 | mp_size_t i; |

140 | ASSERT (size >= 0); |

141 | for (i = 0; i < size; i++) |

142 | ptr[i] = value; |

143 | } |

144 | |

145 | void |

146 | refmpn_zero (mp_ptr ptr, mp_size_t size) |

147 | { |

148 | refmpn_fill (ptr, size, CNST_LIMB(0)); |

149 | } |

150 | |

151 | int |

152 | refmpn_zero_p (mp_srcptr ptr, mp_size_t size) |

153 | { |

154 | mp_size_t i; |

155 | for (i = 0; i < size; i++) |

156 | if (ptr[i] != 0) |

157 | return 0; |

158 | return 1; |

159 | } |

160 | |

161 | /* the highest one bit in x */ |

162 | mp_limb_t |

163 | refmpn_msbone (mp_limb_t x) |

164 | { |

165 | mp_limb_t n = (mp_limb_t) 1 << (BITS_PER_MP_LIMB-1); |

166 | |

167 | while (n != 0) |

168 | { |

169 | if (x & n) |

170 | break; |

171 | n >>= 1; |

172 | } |

173 | return n; |

174 | } |

175 | |

176 | /* a mask of the highest one bit plus and all bits below */ |

177 | mp_limb_t |

178 | refmpn_msbone_mask (mp_limb_t x) |

179 | { |

180 | if (x == 0) |

181 | return 0; |

182 | |

183 | return (refmpn_msbone (x) << 1) - 1; |

184 | } |

185 | |

186 | |

187 | void |

188 | refmpn_setbit (mp_ptr ptr, unsigned long bit) |

189 | { |

190 | ptr[bit/GMP_NUMB_BITS] |= CNST_LIMB(1) << (bit%GMP_NUMB_BITS); |

191 | } |

192 | |

193 | void |

194 | refmpn_clrbit (mp_ptr ptr, unsigned long bit) |

195 | { |

196 | ptr[bit/GMP_NUMB_BITS] &= ~ (CNST_LIMB(1) << (bit%GMP_NUMB_BITS)); |

197 | } |

198 | |

199 | #define REFMPN_TSTBIT(ptr,bit) \ |

200 | (((ptr)[(bit)/GMP_NUMB_BITS] & (CNST_LIMB(1) << ((bit)%GMP_NUMB_BITS))) != 0) |

201 | |

202 | int |

203 | refmpn_tstbit (mp_srcptr ptr, unsigned long bit) |

204 | { |

205 | return REFMPN_TSTBIT (ptr, bit); |

206 | } |

207 | |

208 | unsigned long |

209 | refmpn_scan0 (mp_srcptr ptr, unsigned long bit) |

210 | { |

211 | while (REFMPN_TSTBIT (ptr, bit) != 0) |

212 | bit++; |

213 | return bit; |

214 | } |

215 | |

216 | unsigned long |

217 | refmpn_scan1 (mp_srcptr ptr, unsigned long bit) |

218 | { |

219 | while (REFMPN_TSTBIT (ptr, bit) == 0) |

220 | bit++; |

221 | return bit; |

222 | } |

223 | |

224 | void |

225 | refmpn_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size) |

226 | { |

227 | ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); |

228 | refmpn_copyi (rp, sp, size); |

229 | } |

230 | |

231 | void |

232 | refmpn_copyi (mp_ptr rp, mp_srcptr sp, mp_size_t size) |

233 | { |

234 | mp_size_t i; |

235 | |

236 | ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size)); |

237 | ASSERT (size >= 0); |

238 | |

239 | for (i = 0; i < size; i++) |

240 | rp[i] = sp[i]; |

241 | } |

242 | |

243 | void |

244 | refmpn_copyd (mp_ptr rp, mp_srcptr sp, mp_size_t size) |

245 | { |

246 | mp_size_t i; |

247 | |

248 | ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size)); |

249 | ASSERT (size >= 0); |

250 | |

251 | for (i = size-1; i >= 0; i--) |

252 | rp[i] = sp[i]; |

253 | } |

254 | |

255 | void |

256 | refmpn_com_n (mp_ptr rp, mp_srcptr sp, mp_size_t size) |

257 | { |

258 | mp_size_t i; |

259 | |

260 | ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); |

261 | ASSERT (size >= 1); |

262 | ASSERT_MPN (sp, size); |

263 | |

264 | for (i = 0; i < size; i++) |

265 | rp[i] = sp[i] ^ GMP_NUMB_MASK; |

266 | } |

267 | |

268 | |

269 | int |

270 | refmpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size) |

271 | { |

272 | mp_size_t i; |

273 | |

274 | ASSERT (size >= 1); |

275 | ASSERT_MPN (xp, size); |

276 | ASSERT_MPN (yp, size); |

277 | |

278 | for (i = size-1; i >= 0; i--) |

279 | { |

280 | if (xp[i] > yp[i]) return 1; |

281 | if (xp[i] < yp[i]) return -1; |

282 | } |

283 | return 0; |

284 | } |

285 | |

286 | int |

287 | refmpn_cmp_allowzero (mp_srcptr xp, mp_srcptr yp, mp_size_t size) |

288 | { |

289 | if (size == 0) |

290 | return 0; |

291 | else |

292 | return refmpn_cmp (xp, yp, size); |

293 | } |

294 | |

295 | int |

296 | refmpn_cmp_twosizes (mp_srcptr xp, mp_size_t xsize, |

297 | mp_srcptr yp, mp_size_t ysize) |

298 | { |

299 | int opp, cmp; |

300 | |

301 | ASSERT_MPN (xp, xsize); |

302 | ASSERT_MPN (yp, ysize); |

303 | |

304 | opp = (xsize < ysize); |

305 | if (opp) |

306 | MPN_SRCPTR_SWAP (xp,xsize, yp,ysize); |

307 | |

308 | if (! refmpn_zero_p (xp+ysize, xsize-ysize)) |

309 | cmp = 1; |

310 | else |

311 | cmp = refmpn_cmp (xp, yp, ysize); |

312 | |

313 | return (opp ? -cmp : cmp); |

314 | } |

315 | |

316 | int |

317 | refmpn_equal_anynail (mp_srcptr xp, mp_srcptr yp, mp_size_t size) |

318 | { |

319 | mp_size_t i; |

320 | ASSERT (size >= 0); |

321 | |

322 | for (i = 0; i < size; i++) |

323 | if (xp[i] != yp[i]) |

324 | return 0; |

325 | return 1; |

326 | } |

327 | |

328 | |

329 | #define LOGOPS(operation) \ |

330 | { \ |

331 | mp_size_t i; \ |

332 | \ |

333 | ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \ |

334 | ASSERT (size >= 1); \ |

335 | ASSERT_MPN (s1p, size); \ |

336 | ASSERT_MPN (s2p, size); \ |

337 | \ |

338 | for (i = 0; i < size; i++) \ |

339 | rp[i] = operation; \ |

340 | } |

341 | |

342 | void |

343 | refmpn_and_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) |

344 | { |

345 | LOGOPS (s1p[i] & s2p[i]); |

346 | } |

347 | void |

348 | refmpn_andn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) |

349 | { |

350 | LOGOPS (s1p[i] & ~s2p[i]); |

351 | } |

352 | void |

353 | refmpn_nand_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) |

354 | { |

355 | LOGOPS ((s1p[i] & s2p[i]) ^ GMP_NUMB_MASK); |

356 | } |

357 | void |

358 | refmpn_ior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) |

359 | { |

360 | LOGOPS (s1p[i] | s2p[i]); |

361 | } |

362 | void |

363 | refmpn_iorn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) |

364 | { |

365 | LOGOPS (s1p[i] | (s2p[i] ^ GMP_NUMB_MASK)); |

366 | } |

367 | void |

368 | refmpn_nior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) |

369 | { |

370 | LOGOPS ((s1p[i] | s2p[i]) ^ GMP_NUMB_MASK); |

371 | } |

372 | void |

373 | refmpn_xor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) |

374 | { |

375 | LOGOPS (s1p[i] ^ s2p[i]); |

376 | } |

377 | void |

378 | refmpn_xnor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) |

379 | { |

380 | LOGOPS ((s1p[i] ^ s2p[i]) ^ GMP_NUMB_MASK); |

381 | } |

382 | |

383 | /* set *w to x+y, return 0 or 1 carry */ |

384 | mp_limb_t |

385 | add (mp_limb_t *w, mp_limb_t x, mp_limb_t y) |

386 | { |

387 | mp_limb_t sum, cy; |

388 | |

389 | ASSERT_LIMB (x); |

390 | ASSERT_LIMB (y); |

391 | |

392 | sum = x + y; |

393 | #if GMP_NAIL_BITS == 0 |

394 | *w = sum; |

395 | cy = (sum < x); |

396 | #else |

397 | *w = sum & GMP_NUMB_MASK; |

398 | cy = (sum >> GMP_NUMB_BITS); |

399 | #endif |

400 | return cy; |

401 | } |

402 | |

403 | /* set *w to x-y, return 0 or 1 borrow */ |

404 | mp_limb_t |

405 | sub (mp_limb_t *w, mp_limb_t x, mp_limb_t y) |

406 | { |

407 | mp_limb_t diff, cy; |

408 | |

409 | ASSERT_LIMB (x); |

410 | ASSERT_LIMB (y); |

411 | |

412 | diff = x - y; |

413 | #if GMP_NAIL_BITS == 0 |

414 | *w = diff; |

415 | cy = (diff > x); |

416 | #else |

417 | *w = diff & GMP_NUMB_MASK; |

418 | cy = (diff >> GMP_NUMB_BITS) & 1; |

419 | #endif |

420 | return cy; |

421 | } |

422 | |

423 | /* set *w to x+y+c (where c is 0 or 1), return 0 or 1 carry */ |

424 | mp_limb_t |

425 | adc (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c) |

426 | { |

427 | mp_limb_t r; |

428 | |

429 | ASSERT_LIMB (x); |

430 | ASSERT_LIMB (y); |

431 | ASSERT (c == 0 || c == 1); |

432 | |

433 | r = add (w, x, y); |

434 | return r + add (w, *w, c); |

435 | } |

436 | |

437 | /* set *w to x-y-c (where c is 0 or 1), return 0 or 1 borrow */ |

438 | mp_limb_t |

439 | sbb (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c) |

440 | { |

441 | mp_limb_t r; |

442 | |

443 | ASSERT_LIMB (x); |

444 | ASSERT_LIMB (y); |

445 | ASSERT (c == 0 || c == 1); |

446 | |

447 | r = sub (w, x, y); |

448 | return r + sub (w, *w, c); |

449 | } |

450 | |

451 | |

452 | #define AORS_1(operation) \ |

453 | { \ |

454 | mp_limb_t i; \ |

455 | \ |

456 | ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \ |

457 | ASSERT (size >= 1); \ |

458 | ASSERT_MPN (sp, size); \ |

459 | ASSERT_LIMB (n); \ |

460 | \ |

461 | for (i = 0; i < size; i++) \ |

462 | n = operation (&rp[i], sp[i], n); \ |

463 | return n; \ |

464 | } |

465 | |

466 | mp_limb_t |

467 | refmpn_add_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n) |

468 | { |

469 | AORS_1 (add); |

470 | } |

471 | mp_limb_t |

472 | refmpn_sub_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n) |

473 | { |

474 | AORS_1 (sub); |

475 | } |

476 | |

477 | #define AORS_NC(operation) \ |

478 | { \ |

479 | mp_size_t i; \ |

480 | \ |

481 | ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \ |

482 | ASSERT (carry == 0 || carry == 1); \ |

483 | ASSERT (size >= 1); \ |

484 | ASSERT_MPN (s1p, size); \ |

485 | ASSERT_MPN (s2p, size); \ |

486 | \ |

487 | for (i = 0; i < size; i++) \ |

488 | carry = operation (&rp[i], s1p[i], s2p[i], carry); \ |

489 | return carry; \ |

490 | } |

491 | |

492 | mp_limb_t |

493 | refmpn_add_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, |

494 | mp_limb_t carry) |

495 | { |

496 | AORS_NC (adc); |

497 | } |

498 | mp_limb_t |

499 | refmpn_sub_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, |

500 | mp_limb_t carry) |

501 | { |

502 | AORS_NC (sbb); |

503 | } |

504 | |

505 | |

506 | mp_limb_t |

507 | refmpn_add_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) |

508 | { |

509 | return refmpn_add_nc (rp, s1p, s2p, size, CNST_LIMB(0)); |

510 | } |

511 | mp_limb_t |

512 | refmpn_sub_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) |

513 | { |

514 | return refmpn_sub_nc (rp, s1p, s2p, size, CNST_LIMB(0)); |

515 | } |

516 | |

517 | |

518 | /* Twos complement, return borrow. */ |

519 | mp_limb_t |

520 | refmpn_neg_n (mp_ptr dst, mp_srcptr src, mp_size_t size) |

521 | { |

522 | mp_ptr zeros; |

523 | mp_limb_t ret; |

524 | |

525 | ASSERT (size >= 1); |

526 | |

527 | zeros = refmpn_malloc_limbs (size); |

528 | refmpn_fill (zeros, size, CNST_LIMB(0)); |

529 | ret = refmpn_sub_n (dst, zeros, src, size); |

530 | free (zeros); |

531 | return ret; |

532 | } |

533 | |

534 | |

535 | #define AORS(aors_n, aors_1) \ |

536 | { \ |

537 | mp_limb_t c; \ |

538 | ASSERT (s1size >= s2size); \ |

539 | ASSERT (s2size >= 1); \ |

540 | c = aors_n (rp, s1p, s2p, s2size); \ |

541 | if (s1size-s2size != 0) \ |

542 | c = aors_1 (rp+s2size, s1p+s2size, s1size-s2size, c); \ |

543 | return c; \ |

544 | } |

545 | mp_limb_t |

546 | refmpn_add (mp_ptr rp, |

547 | mp_srcptr s1p, mp_size_t s1size, |

548 | mp_srcptr s2p, mp_size_t s2size) |

549 | { |

550 | AORS (refmpn_add_n, refmpn_add_1); |

551 | } |

552 | mp_limb_t |

553 | refmpn_sub (mp_ptr rp, |

554 | mp_srcptr s1p, mp_size_t s1size, |

555 | mp_srcptr s2p, mp_size_t s2size) |

556 | { |

557 | AORS (refmpn_sub_n, refmpn_sub_1); |

558 | } |

559 | |

560 | |

561 | #define SHIFTHIGH(x) ((x) << BITS_PER_MP_LIMB/2) |

562 | #define SHIFTLOW(x) ((x) >> BITS_PER_MP_LIMB/2) |

563 | |

564 | #define LOWMASK (((mp_limb_t) 1 << BITS_PER_MP_LIMB/2)-1) |

565 | #define HIGHMASK SHIFTHIGH(LOWMASK) |

566 | |

567 | #define LOWPART(x) ((x) & LOWMASK) |

568 | #define HIGHPART(x) SHIFTLOW((x) & HIGHMASK) |

569 | |

570 | /* Set *hi,*lo to x*y, using full limbs not nails. */ |

571 | mp_limb_t |

572 | refmpn_umul_ppmm (mp_limb_t *lo, mp_limb_t x, mp_limb_t y) |

573 | { |

574 | mp_limb_t hi, s; |

575 | |

576 | *lo = LOWPART(x) * LOWPART(y); |

577 | hi = HIGHPART(x) * HIGHPART(y); |

578 | |

579 | s = LOWPART(x) * HIGHPART(y); |

580 | hi += HIGHPART(s); |

581 | s = SHIFTHIGH(LOWPART(s)); |

582 | *lo += s; |

583 | hi += (*lo < s); |

584 | |

585 | s = HIGHPART(x) * LOWPART(y); |

586 | hi += HIGHPART(s); |

587 | s = SHIFTHIGH(LOWPART(s)); |

588 | *lo += s; |

589 | hi += (*lo < s); |

590 | |

591 | return hi; |

592 | } |

593 | |

594 | mp_limb_t |

595 | refmpn_umul_ppmm_r (mp_limb_t x, mp_limb_t y, mp_limb_t *lo) |

596 | { |

597 | return refmpn_umul_ppmm (lo, x, y); |

598 | } |

599 | |

600 | mp_limb_t |

601 | refmpn_mul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier, |

602 | mp_limb_t carry) |

603 | { |

604 | mp_size_t i; |

605 | mp_limb_t hi, lo; |

606 | |

607 | ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size)); |

608 | ASSERT (size >= 1); |

609 | ASSERT_MPN (sp, size); |

610 | ASSERT_LIMB (multiplier); |

611 | ASSERT_LIMB (carry); |

612 | |

613 | multiplier <<= GMP_NAIL_BITS; |

614 | for (i = 0; i < size; i++) |

615 | { |

616 | hi = refmpn_umul_ppmm (&lo, sp[i], multiplier); |

617 | lo >>= GMP_NAIL_BITS; |

618 | ASSERT_NOCARRY (add (&hi, hi, add (&lo, lo, carry))); |

619 | rp[i] = lo; |

620 | carry = hi; |

621 | } |

622 | return carry; |

623 | } |

624 | |

625 | mp_limb_t |

626 | refmpn_mul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier) |

627 | { |

628 | return refmpn_mul_1c (rp, sp, size, multiplier, CNST_LIMB(0)); |

629 | } |

630 | |

631 | |

632 | mp_limb_t |

633 | refmpn_mul_2 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_srcptr mult) |

634 | { |

635 | mp_ptr src_copy; |

636 | mp_limb_t c; |

637 | |

638 | ASSERT (refmpn_overlap_fullonly_p (dst, src, size)); |

639 | ASSERT (! refmpn_overlap_p (dst, size+1, mult, 2)); |

640 | ASSERT (size >= 1); |

641 | ASSERT_MPN (mult, 2); |

642 | |

643 | /* in case dst==src */ |

644 | src_copy = refmpn_malloc_limbs (size); |

645 | refmpn_copyi (src_copy, src, size); |

646 | src = src_copy; |

647 | |

648 | dst[size] = refmpn_mul_1 (dst, src, size, mult[0]); |

649 | c = refmpn_addmul_1 (dst+1, src, size, mult[1]); |

650 | free (src_copy); |

651 | return c; |

652 | } |

653 | |

654 | |

655 | #define AORSMUL_1C(operation_n) \ |

656 | { \ |

657 | mp_ptr p; \ |

658 | mp_limb_t ret; \ |

659 | \ |

660 | ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \ |

661 | \ |

662 | p = refmpn_malloc_limbs (size); \ |

663 | ret = refmpn_mul_1c (p, sp, size, multiplier, carry); \ |

664 | ret += operation_n (rp, rp, p, size); \ |

665 | \ |

666 | free (p); \ |

667 | return ret; \ |

668 | } |

669 | |

670 | mp_limb_t |

671 | refmpn_addmul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, |

672 | mp_limb_t multiplier, mp_limb_t carry) |

673 | { |

674 | AORSMUL_1C (refmpn_add_n); |

675 | } |

676 | mp_limb_t |

677 | refmpn_submul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, |

678 | mp_limb_t multiplier, mp_limb_t carry) |

679 | { |

680 | AORSMUL_1C (refmpn_sub_n); |

681 | } |

682 | |

683 | |

684 | mp_limb_t |

685 | refmpn_addmul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier) |

686 | { |

687 | return refmpn_addmul_1c (rp, sp, size, multiplier, CNST_LIMB(0)); |

688 | } |

689 | mp_limb_t |

690 | refmpn_submul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier) |

691 | { |

692 | return refmpn_submul_1c (rp, sp, size, multiplier, CNST_LIMB(0)); |

693 | } |

694 | |

695 | |

696 | mp_limb_t |

697 | refmpn_addsub_nc (mp_ptr r1p, mp_ptr r2p, |

698 | mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, |

699 | mp_limb_t carry) |

700 | { |

701 | mp_ptr p; |

702 | mp_limb_t acy, scy; |

703 | |

704 | /* Destinations can't overlap. */ |

705 | ASSERT (! refmpn_overlap_p (r1p, size, r2p, size)); |

706 | ASSERT (refmpn_overlap_fullonly_two_p (r1p, s1p, s2p, size)); |

707 | ASSERT (refmpn_overlap_fullonly_two_p (r2p, s1p, s2p, size)); |

708 | ASSERT (size >= 1); |

709 | |

710 | /* in case r1p==s1p or r1p==s2p */ |

711 | p = refmpn_malloc_limbs (size); |

712 | |

713 | acy = refmpn_add_nc (p, s1p, s2p, size, carry >> 1); |

714 | scy = refmpn_sub_nc (r2p, s1p, s2p, size, carry & 1); |

715 | refmpn_copyi (r1p, p, size); |

716 | |

717 | free (p); |

718 | return 2 * acy + scy; |

719 | } |

720 | |

721 | mp_limb_t |

722 | refmpn_addsub_n (mp_ptr r1p, mp_ptr r2p, |

723 | mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) |

724 | { |

725 | return refmpn_addsub_nc (r1p, r2p, s1p, s2p, size, CNST_LIMB(0)); |

726 | } |

727 | |

728 | |

729 | /* Right shift hi,lo and return the low limb of the result. |

730 | Note a shift by BITS_PER_MP_LIMB isn't assumed to work (doesn't on x86). */ |

731 | mp_limb_t |

732 | rshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift) |

733 | { |

734 | ASSERT (shift < GMP_NUMB_BITS); |

735 | if (shift == 0) |

736 | return lo; |

737 | else |

738 | return ((hi << (GMP_NUMB_BITS-shift)) | (lo >> shift)) & GMP_NUMB_MASK; |

739 | } |

740 | |

741 | /* Left shift hi,lo and return the high limb of the result. |

742 | Note a shift by BITS_PER_MP_LIMB isn't assumed to work (doesn't on x86). */ |

743 | mp_limb_t |

744 | lshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift) |

745 | { |

746 | ASSERT (shift < GMP_NUMB_BITS); |

747 | if (shift == 0) |

748 | return hi; |

749 | else |

750 | return ((hi << shift) | (lo >> (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK; |

751 | } |

752 | |

753 | |

754 | mp_limb_t |

755 | refmpn_rshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) |

756 | { |

757 | mp_limb_t ret; |

758 | mp_size_t i; |

759 | |

760 | ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size)); |

761 | ASSERT (size >= 1); |

762 | ASSERT (shift >= 1 && shift < GMP_NUMB_BITS); |

763 | ASSERT_MPN (sp, size); |

764 | |

765 | ret = rshift_make (sp[0], CNST_LIMB(0), shift); |

766 | |

767 | for (i = 0; i < size-1; i++) |

768 | rp[i] = rshift_make (sp[i+1], sp[i], shift); |

769 | |

770 | rp[i] = rshift_make (CNST_LIMB(0), sp[i], shift); |

771 | return ret; |

772 | } |

773 | |

774 | mp_limb_t |

775 | refmpn_lshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) |

776 | { |

777 | mp_limb_t ret; |

778 | mp_size_t i; |

779 | |

780 | ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size)); |

781 | ASSERT (size >= 1); |

782 | ASSERT (shift >= 1 && shift < GMP_NUMB_BITS); |

783 | ASSERT_MPN (sp, size); |

784 | |

785 | ret = lshift_make (CNST_LIMB(0), sp[size-1], shift); |

786 | |

787 | for (i = size-2; i >= 0; i--) |

788 | rp[i+1] = lshift_make (sp[i+1], sp[i], shift); |

789 | |

790 | rp[i+1] = lshift_make (sp[i+1], CNST_LIMB(0), shift); |

791 | return ret; |

792 | } |

793 | |

794 | |

795 | mp_limb_t |

796 | refmpn_rshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) |

797 | { |

798 | if (shift == 0) |

799 | { |

800 | refmpn_copyi (rp, sp, size); |

801 | return 0; |

802 | } |

803 | else |

804 | { |

805 | return refmpn_rshift (rp, sp, size, shift); |

806 | } |

807 | } |

808 | |

809 | mp_limb_t |

810 | refmpn_lshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) |

811 | { |

812 | if (shift == 0) |

813 | { |

814 | refmpn_copyd (rp, sp, size); |

815 | return 0; |

816 | } |

817 | else |

818 | { |

819 | return refmpn_lshift (rp, sp, size, shift); |

820 | } |

821 | } |

822 | |

823 | |

824 | /* Divide h,l by d, return quotient, store remainder to *rp. |

825 | Operates on full limbs, not nails. |

826 | Must have h < d. |

827 | __udiv_qrnnd_c isn't simple, and it's a bit slow, but it works. */ |

828 | mp_limb_t |

829 | refmpn_udiv_qrnnd (mp_limb_t *rp, mp_limb_t h, mp_limb_t l, mp_limb_t d) |

830 | { |

831 | mp_limb_t q, r; |

832 | int n; |

833 | |

834 | ASSERT (d != 0); |

835 | ASSERT (h < d); |

836 | |

837 | #if 0 |

838 | udiv_qrnnd (q, r, h, l, d); |

839 | *rp = r; |

840 | return q; |

841 | #endif |

842 | |

843 | n = refmpn_count_leading_zeros (d); |

844 | d <<= n; |

845 | |

846 | if (n != 0) |

847 | { |

848 | h = (h << n) | (l >> (GMP_LIMB_BITS - n)); |

849 | l <<= n; |

850 | } |

851 | |

852 | __udiv_qrnnd_c (q, r, h, l, d); |

853 | r >>= n; |

854 | *rp = r; |

855 | return q; |

856 | } |

857 | |

858 | mp_limb_t |

859 | refmpn_udiv_qrnnd_r (mp_limb_t h, mp_limb_t l, mp_limb_t d, mp_limb_t *rp) |

860 | { |

861 | return refmpn_udiv_qrnnd (rp, h, l, d); |

862 | } |

863 | |

864 | /* This little subroutine avoids some bad code generation from i386 gcc 3.0 |

865 | -fPIC -O2 -fomit-frame-pointer (%ebp being used uninitialized). */ |

866 | static mp_limb_t |

867 | refmpn_divmod_1c_workaround (mp_ptr rp, mp_srcptr sp, mp_size_t size, |

868 | mp_limb_t divisor, mp_limb_t carry) |

869 | { |

870 | mp_size_t i; |

871 | for (i = size-1; i >= 0; i--) |

872 | { |

873 | rp[i] = refmpn_udiv_qrnnd (&carry, carry, |

874 | sp[i] << GMP_NAIL_BITS, |

875 | divisor << GMP_NAIL_BITS); |

876 | carry >>= GMP_NAIL_BITS; |

877 | } |

878 | return carry; |

879 | } |

880 | |

881 | mp_limb_t |

882 | refmpn_divmod_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, |

883 | mp_limb_t divisor, mp_limb_t carry) |

884 | { |

885 | mp_ptr sp_orig; |

886 | mp_ptr prod; |

887 | mp_limb_t carry_orig; |

888 | |

889 | ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); |

890 | ASSERT (size >= 0); |

891 | ASSERT (carry < divisor); |

892 | ASSERT_MPN (sp, size); |

893 | ASSERT_LIMB (divisor); |

894 | ASSERT_LIMB (carry); |

895 | |

896 | if (size == 0) |

897 | return carry; |

898 | |

899 | sp_orig = refmpn_memdup_limbs (sp, size); |

900 | prod = refmpn_malloc_limbs (size); |

901 | carry_orig = carry; |

902 | |

903 | carry = refmpn_divmod_1c_workaround (rp, sp, size, divisor, carry); |

904 | |

905 | /* check by multiplying back */ |

906 | #if 0 |

907 | printf ("size=%ld divisor=0x%lX carry=0x%lX remainder=0x%lX\n", |

908 | size, divisor, carry_orig, carry); |

909 | mpn_trace("s",sp_copy,size); |

910 | mpn_trace("r",rp,size); |

911 | printf ("mul_1c %lX\n", refmpn_mul_1c (prod, rp, size, divisor, carry)); |

912 | mpn_trace("p",prod,size); |

913 | #endif |

914 | ASSERT (refmpn_mul_1c (prod, rp, size, divisor, carry) == carry_orig); |

915 | ASSERT (refmpn_cmp (prod, sp_orig, size) == 0); |

916 | free (sp_orig); |

917 | free (prod); |

918 | |

919 | return carry; |

920 | } |

921 | |

922 | mp_limb_t |

923 | refmpn_divmod_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor) |

924 | { |

925 | return refmpn_divmod_1c (rp, sp, size, divisor, CNST_LIMB(0)); |

926 | } |

927 | |

928 | |

929 | mp_limb_t |

930 | refmpn_mod_1c (mp_srcptr sp, mp_size_t size, mp_limb_t divisor, |

931 | mp_limb_t carry) |

932 | { |

933 | mp_ptr p = refmpn_malloc_limbs (size); |

934 | carry = refmpn_divmod_1c (p, sp, size, divisor, carry); |

935 | free (p); |

936 | return carry; |

937 | } |

938 | |

939 | mp_limb_t |

940 | refmpn_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor) |

941 | { |

942 | return refmpn_mod_1c (sp, size, divisor, CNST_LIMB(0)); |

943 | } |

944 | |

945 | mp_limb_t |

946 | refmpn_preinv_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor, |

947 | mp_limb_t inverse) |

948 | { |

949 | ASSERT (divisor & GMP_NUMB_HIGHBIT); |

950 | ASSERT (inverse == refmpn_invert_limb (divisor)); |

951 | return refmpn_mod_1 (sp, size, divisor); |

952 | } |

953 | |

954 | /* This implementation will be rather slow, but has the advantage of being |

955 | in a different style than the libgmp versions. */ |

956 | mp_limb_t |

957 | refmpn_mod_34lsub1 (mp_srcptr p, mp_size_t n) |

958 | { |

959 | ASSERT ((GMP_NUMB_BITS % 4) == 0); |

960 | return mpn_mod_1 (p, n, (CNST_LIMB(1) << (3 * GMP_NUMB_BITS / 4)) - 1); |

961 | } |

962 | |

963 | |

964 | mp_limb_t |

965 | refmpn_divrem_1c (mp_ptr rp, mp_size_t xsize, |

966 | mp_srcptr sp, mp_size_t size, mp_limb_t divisor, |

967 | mp_limb_t carry) |

968 | { |

969 | mp_ptr z; |

970 | |

971 | z = refmpn_malloc_limbs (xsize); |

972 | refmpn_fill (z, xsize, CNST_LIMB(0)); |

973 | |

974 | carry = refmpn_divmod_1c (rp+xsize, sp, size, divisor, carry); |

975 | carry = refmpn_divmod_1c (rp, z, xsize, divisor, carry); |

976 | |

977 | free (z); |

978 | return carry; |

979 | } |

980 | |

981 | mp_limb_t |

982 | refmpn_divrem_1 (mp_ptr rp, mp_size_t xsize, |

983 | mp_srcptr sp, mp_size_t size, mp_limb_t divisor) |

984 | { |

985 | return refmpn_divrem_1c (rp, xsize, sp, size, divisor, CNST_LIMB(0)); |

986 | } |

987 | |

988 | mp_limb_t |

989 | refmpn_preinv_divrem_1 (mp_ptr rp, mp_size_t xsize, |

990 | mp_srcptr sp, mp_size_t size, |

991 | mp_limb_t divisor, mp_limb_t inverse, unsigned shift) |

992 | { |

993 | ASSERT (size >= 0); |

994 | ASSERT (shift == refmpn_count_leading_zeros (divisor)); |

995 | ASSERT (inverse == refmpn_invert_limb (divisor << shift)); |

996 | |

997 | return refmpn_divrem_1 (rp, xsize, sp, size, divisor); |

998 | } |

999 | |

1000 | |

1001 | /* Inverse is floor((b*(b-d)-1) / d), per division by invariant integers |

1002 | paper, figure 8.1 m', where b=2^BITS_PER_MP_LIMB. Note that -d-1 < d |

1003 | since d has the high bit set. */ |

1004 | |

1005 | mp_limb_t |

1006 | refmpn_invert_limb (mp_limb_t d) |

1007 | { |

1008 | mp_limb_t r; |

1009 | ASSERT (d & GMP_LIMB_HIGHBIT); |

1010 | return refmpn_udiv_qrnnd (&r, -d-1, -1, d); |

1011 | } |

1012 | |

1013 | |

1014 | /* The aim is to produce a dst quotient and return a remainder c, satisfying |

1015 | c*b^n + src-i == 3*dst, where i is the incoming carry. |

1016 | |

1017 | Some value c==0, c==1 or c==2 will satisfy, so just try each. |

1018 | |

1019 | If GMP_NUMB_BITS is even then 2^GMP_NUMB_BITS==1mod3 and a non-zero |

1020 | remainder from the first division attempt determines the correct |

1021 | remainder (3-c), but don't bother with that, since we can't guarantee |

1022 | anything about GMP_NUMB_BITS when using nails. |

1023 | |

1024 | If the initial src-i produces a borrow then refmpn_sub_1 leaves a twos |

1025 | complement negative, ie. b^n+a-i, and the calculation produces c1 |

1026 | satisfying c1*b^n + b^n+src-i == 3*dst, from which clearly c=c1+1. This |

1027 | means it's enough to just add any borrow back at the end. |

1028 | |

1029 | A borrow only occurs when a==0 or a==1, and, by the same reasoning as in |

1030 | mpn/generic/diveby3.c, the c1 that results in those cases will only be 0 |

1031 | or 1 respectively, so with 1 added the final return value is still in the |

1032 | prescribed range 0 to 2. */ |

1033 | |

1034 | mp_limb_t |

1035 | refmpn_divexact_by3c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t carry) |

1036 | { |

1037 | mp_ptr spcopy; |

1038 | mp_limb_t c, cs; |

1039 | |

1040 | ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); |

1041 | ASSERT (size >= 1); |

1042 | ASSERT (carry <= 2); |

1043 | ASSERT_MPN (sp, size); |

1044 | |

1045 | spcopy = refmpn_malloc_limbs (size); |

1046 | cs = refmpn_sub_1 (spcopy, sp, size, carry); |

1047 | |

1048 | for (c = 0; c <= 2; c++) |

1049 | if (refmpn_divmod_1c (rp, spcopy, size, 3, c) == 0) |

1050 | goto done; |

1051 | ASSERT_FAIL (no value of c satisfies); |

1052 | |

1053 | done: |

1054 | c += cs; |

1055 | ASSERT (c <= 2); |

1056 | |

1057 | free (spcopy); |

1058 | return c; |

1059 | } |

1060 | |

1061 | mp_limb_t |

1062 | refmpn_divexact_by3 (mp_ptr rp, mp_srcptr sp, mp_size_t size) |

1063 | { |

1064 | return refmpn_divexact_by3c (rp, sp, size, CNST_LIMB(0)); |

1065 | } |

1066 | |

1067 | |

1068 | /* The same as mpn/generic/mul_basecase.c, but using refmpn functions. */ |

1069 | void |

1070 | refmpn_mul_basecase (mp_ptr prodp, |

1071 | mp_srcptr up, mp_size_t usize, |

1072 | mp_srcptr vp, mp_size_t vsize) |

1073 | { |

1074 | mp_size_t i; |

1075 | |

1076 | ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize)); |

1077 | ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize)); |

1078 | ASSERT (usize >= vsize); |

1079 | ASSERT (vsize >= 1); |

1080 | ASSERT_MPN (up, usize); |

1081 | ASSERT_MPN (vp, vsize); |

1082 | |

1083 | prodp[usize] = refmpn_mul_1 (prodp, up, usize, vp[0]); |

1084 | for (i = 1; i < vsize; i++) |

1085 | prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]); |

1086 | } |

1087 | |

1088 | void |

1089 | refmpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size) |

1090 | { |

1091 | refmpn_mul_basecase (prodp, up, size, vp, size); |

1092 | } |

1093 | |

1094 | void |

1095 | refmpn_sqr (mp_ptr dst, mp_srcptr src, mp_size_t size) |

1096 | { |

1097 | refmpn_mul_basecase (dst, src, size, src, size); |

1098 | } |

1099 | |

1100 | /* Allowing usize<vsize, usize==0 or vsize==0. */ |

1101 | void |

1102 | refmpn_mul_any (mp_ptr prodp, |

1103 | mp_srcptr up, mp_size_t usize, |

1104 | mp_srcptr vp, mp_size_t vsize) |

1105 | { |

1106 | ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize)); |

1107 | ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize)); |

1108 | ASSERT (usize >= 0); |

1109 | ASSERT (vsize >= 0); |

1110 | ASSERT_MPN (up, usize); |

1111 | ASSERT_MPN (vp, vsize); |

1112 | |

1113 | if (usize == 0) |

1114 | { |

1115 | refmpn_fill (prodp, vsize, CNST_LIMB(0)); |

1116 | return; |

1117 | } |

1118 | |

1119 | if (vsize == 0) |

1120 | { |

1121 | refmpn_fill (prodp, usize, CNST_LIMB(0)); |

1122 | return; |

1123 | } |

1124 | |

1125 | if (usize >= vsize) |

1126 | refmpn_mul_basecase (prodp, up, usize, vp, vsize); |

1127 | else |

1128 | refmpn_mul_basecase (prodp, vp, vsize, up, usize); |

1129 | } |

1130 | |

1131 | |

1132 | mp_limb_t |

1133 | refmpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y) |

1134 | { |

1135 | mp_limb_t x; |

1136 | int twos; |

1137 | |

1138 | ASSERT (y != 0); |

1139 | ASSERT (! refmpn_zero_p (xp, xsize)); |

1140 | ASSERT_MPN (xp, xsize); |

1141 | ASSERT_LIMB (y); |

1142 | |

1143 | x = refmpn_mod_1 (xp, xsize, y); |

1144 | if (x == 0) |

1145 | return y; |

1146 | |

1147 | twos = 0; |

1148 | while ((x & 1) == 0 && (y & 1) == 0) |

1149 | { |

1150 | x >>= 1; |

1151 | y >>= 1; |

1152 | twos++; |

1153 | } |

1154 | |

1155 | for (;;) |

1156 | { |

1157 | while ((x & 1) == 0) x >>= 1; |

1158 | while ((y & 1) == 0) y >>= 1; |

1159 | |

1160 | if (x < y) |

1161 | MP_LIMB_T_SWAP (x, y); |

1162 | |

1163 | x -= y; |

1164 | if (x == 0) |

1165 | break; |

1166 | } |

1167 | |

1168 | return y << twos; |

1169 | } |

1170 | |

1171 | |

1172 | /* Based on the full limb x, not nails. */ |

1173 | unsigned |

1174 | refmpn_count_leading_zeros (mp_limb_t x) |

1175 | { |

1176 | unsigned n = 0; |

1177 | |

1178 | ASSERT (x != 0); |

1179 | |

1180 | while ((x & GMP_LIMB_HIGHBIT) == 0) |

1181 | { |

1182 | x <<= 1; |

1183 | n++; |

1184 | } |

1185 | return n; |

1186 | } |

1187 | |

1188 | /* Full limbs allowed, not limited to nails. */ |

1189 | unsigned |

1190 | refmpn_count_trailing_zeros (mp_limb_t x) |

1191 | { |

1192 | unsigned n = 0; |

1193 | |

1194 | ASSERT (x != 0); |

1195 | ASSERT_LIMB (x); |

1196 | |

1197 | while ((x & 1) == 0) |

1198 | { |

1199 | x >>= 1; |

1200 | n++; |

1201 | } |

1202 | return n; |

1203 | } |

1204 | |

1205 | /* Strip factors of two (low zero bits) from {p,size} by right shifting. |

1206 | The return value is the number of twos stripped. */ |

1207 | mp_size_t |

1208 | refmpn_strip_twos (mp_ptr p, mp_size_t size) |

1209 | { |

1210 | mp_size_t limbs; |

1211 | unsigned shift; |

1212 | |

1213 | ASSERT (size >= 1); |

1214 | ASSERT (! refmpn_zero_p (p, size)); |

1215 | ASSERT_MPN (p, size); |

1216 | |

1217 | for (limbs = 0; p[0] == 0; limbs++) |

1218 | { |

1219 | refmpn_copyi (p, p+1, size-1); |

1220 | p[size-1] = 0; |

1221 | } |

1222 | |

1223 | shift = refmpn_count_trailing_zeros (p[0]); |

1224 | if (shift) |

1225 | refmpn_rshift (p, p, size, shift); |

1226 | |

1227 | return limbs*GMP_NUMB_BITS + shift; |

1228 | } |

1229 | |

1230 | mp_limb_t |

1231 | refmpn_gcd (mp_ptr gp, mp_ptr xp, mp_size_t xsize, mp_ptr yp, mp_size_t ysize) |

1232 | { |

1233 | int cmp; |

1234 | |

1235 | ASSERT (ysize >= 1); |

1236 | ASSERT (xsize >= ysize); |

1237 | ASSERT ((xp[0] & 1) != 0); |

1238 | ASSERT ((yp[0] & 1) != 0); |

1239 | /* ASSERT (xp[xsize-1] != 0); */ /* don't think x needs to be odd */ |

1240 | ASSERT (yp[ysize-1] != 0); |

1241 | ASSERT (refmpn_overlap_fullonly_p (gp, xp, xsize)); |

1242 | ASSERT (refmpn_overlap_fullonly_p (gp, yp, ysize)); |

1243 | ASSERT (! refmpn_overlap_p (xp, xsize, yp, ysize)); |

1244 | if (xsize == ysize) |

1245 | ASSERT (refmpn_msbone (xp[xsize-1]) >= refmpn_msbone (yp[ysize-1])); |

1246 | ASSERT_MPN (xp, xsize); |

1247 | ASSERT_MPN (yp, ysize); |

1248 | |

1249 | refmpn_strip_twos (xp, xsize); |

1250 | MPN_NORMALIZE (xp, xsize); |

1251 | MPN_NORMALIZE (yp, ysize); |

1252 | |

1253 | for (;;) |

1254 | { |

1255 | cmp = refmpn_cmp_twosizes (xp, xsize, yp, ysize); |

1256 | if (cmp == 0) |

1257 | break; |

1258 | if (cmp < 0) |

1259 | MPN_PTR_SWAP (xp,xsize, yp,ysize); |

1260 | |

1261 | ASSERT_NOCARRY (refmpn_sub (xp, xp, xsize, yp, ysize)); |

1262 | |

1263 | refmpn_strip_twos (xp, xsize); |

1264 | MPN_NORMALIZE (xp, xsize); |

1265 | } |

1266 | |

1267 | refmpn_copyi (gp, xp, xsize); |

1268 | return xsize; |

1269 | } |

1270 | |

1271 | |

1272 | unsigned long |

1273 | refmpn_popcount (mp_srcptr sp, mp_size_t size) |

1274 | { |

1275 | unsigned long count = 0; |

1276 | mp_size_t i; |

1277 | int j; |

1278 | mp_limb_t l; |

1279 | |

1280 | ASSERT (size >= 0); |

1281 | ASSERT_MPN (sp, size); |

1282 | |

1283 | for (i = 0; i < size; i++) |

1284 | { |

1285 | l = sp[i]; |

1286 | for (j = 0; j < GMP_NUMB_BITS; j++) |

1287 | { |

1288 | count += (l & 1); |

1289 | l >>= 1; |

1290 | } |

1291 | } |

1292 | return count; |

1293 | } |

1294 | |

1295 | unsigned long |

1296 | refmpn_hamdist (mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) |

1297 | { |

1298 | mp_ptr d; |

1299 | unsigned long count; |

1300 | |

1301 | ASSERT (size >= 0); |

1302 | ASSERT_MPN (s1p, size); |

1303 | ASSERT_MPN (s2p, size); |

1304 | |

1305 | if (size == 0) |

1306 | return 0; |

1307 | |

1308 | d = refmpn_malloc_limbs (size); |

1309 | refmpn_xor_n (d, s1p, s2p, size); |

1310 | count = refmpn_popcount (d, size); |

1311 | free (d); |

1312 | return count; |

1313 | } |

1314 | |

1315 | |

1316 | /* set r to a%d */ |

1317 | void |

1318 | refmpn_mod2 (mp_limb_t r[2], const mp_limb_t a[2], const mp_limb_t d[2]) |

1319 | { |

1320 | mp_limb_t D[2]; |

1321 | int n; |

1322 | |

1323 | ASSERT (! refmpn_overlap_p (r, 2, d, 2)); |

1324 | ASSERT_MPN (a, 2); |

1325 | ASSERT_MPN (d, 2); |

1326 | |

1327 | D[1] = d[1], D[0] = d[0]; |

1328 | r[1] = a[1], r[0] = a[0]; |

1329 | n = 0; |

1330 | |

1331 | for (;;) |

1332 | { |

1333 | if (D[1] & GMP_NUMB_HIGHBIT) |

1334 | break; |

1335 | if (refmpn_cmp (r, D, 2) <= 0) |

1336 | break; |

1337 | refmpn_lshift (D, D, 2, 1); |

1338 | n++; |

1339 | ASSERT (n <= GMP_NUMB_BITS); |

1340 | } |

1341 | |

1342 | while (n >= 0) |

1343 | { |

1344 | if (refmpn_cmp (r, D, 2) >= 0) |

1345 | ASSERT_NOCARRY (refmpn_sub_n (r, r, D, 2)); |

1346 | refmpn_rshift (D, D, 2, 1); |

1347 | n--; |

1348 | } |

1349 | |

1350 | ASSERT (refmpn_cmp (r, d, 2) < 0); |

1351 | } |

1352 | |

1353 | |

1354 | /* Find n where 0<n<2^GMP_NUMB_BITS and n==c mod m */ |

1355 | mp_limb_t |

1356 | refmpn_gcd_finda (const mp_limb_t c[2]) |

1357 | { |

1358 | mp_limb_t n1[2], n2[2]; |

1359 | |

1360 | ASSERT (c[0] != 0); |

1361 | ASSERT (c[1] != 0); |

1362 | ASSERT_MPN (c, 2); |

1363 | |

1364 | n1[0] = c[0]; |

1365 | n1[1] = c[1]; |

1366 | |

1367 | n2[0] = -n1[0]; |

1368 | n2[1] = ~n1[1]; |

1369 | |

1370 | while (n2[1] != 0) |

1371 | { |

1372 | refmpn_mod2 (n1, n1, n2); |

1373 | |

1374 | MP_LIMB_T_SWAP (n1[0], n2[0]); |

1375 | MP_LIMB_T_SWAP (n1[1], n2[1]); |

1376 | } |

1377 | |

1378 | return n2[0]; |

1379 | } |

1380 | |

1381 | |

1382 | /* Similar to mpn/generic/sb_divrem_mn.c, but somewhat simplified, in |

1383 | particular the trial quotient is allowed to be 2 too big. */ |

1384 | mp_limb_t |

1385 | refmpn_sb_divrem_mn (mp_ptr qp, |

1386 | mp_ptr np, mp_size_t nsize, |

1387 | mp_srcptr dp, mp_size_t dsize) |

1388 | { |

1389 | mp_limb_t retval = 0; |

1390 | mp_size_t i; |

1391 | mp_limb_t d1 = dp[dsize-1]; |

1392 | mp_ptr np_orig = refmpn_memdup_limbs (np, nsize); |

1393 | |

1394 | ASSERT (nsize >= dsize); |

1395 | /* ASSERT (dsize > 2); */ |

1396 | ASSERT (dsize >= 2); |

1397 | ASSERT (dp[dsize-1] & GMP_LIMB_HIGHBIT); |

1398 | ASSERT (! refmpn_overlap_p (qp, nsize-dsize, np, nsize) || qp+dsize >= np); |

1399 | ASSERT_MPN (np, nsize); |

1400 | ASSERT_MPN (dp, dsize); |

1401 | |

1402 | i = nsize-dsize; |

1403 | if (refmpn_cmp (np+i, dp, dsize) >= 0) |

1404 | { |

1405 | ASSERT_NOCARRY (refmpn_sub_n (np+i, np+i, dp, dsize)); |

1406 | retval = 1; |

1407 | } |

1408 | |

1409 | for (i--; i >= 0; i--) |

1410 | { |

1411 | mp_limb_t n0 = np[i+dsize]; |

1412 | mp_limb_t n1 = np[i+dsize-1]; |

1413 | mp_limb_t q, dummy_r; |

1414 | |

1415 | ASSERT (n0 <= d1); |

1416 | if (n0 == d1) |

1417 | q = MP_LIMB_T_MAX; |

1418 | else |

1419 | q = refmpn_udiv_qrnnd (&dummy_r, n0, n1, d1); |

1420 | |

1421 | n0 -= refmpn_submul_1 (np+i, dp, dsize, q); |

1422 | ASSERT (n0 == 0 || n0 == MP_LIMB_T_MAX); |

1423 | if (n0) |

1424 | { |

1425 | q--; |

1426 | if (! refmpn_add_n (np+i, np+i, dp, dsize)) |

1427 | { |

1428 | q--; |

1429 | ASSERT (refmpn_add_n (np+i, np+i, dp, dsize) != 0); |

1430 | } |

1431 | } |

1432 | np[i+dsize] = 0; |

1433 | |

1434 | qp[i] = q; |

1435 | } |

1436 | |

1437 | /* remainder < divisor */ |

1438 | ASSERT (refmpn_cmp (np, dp, dsize) < 0); |

1439 | |

1440 | /* multiply back to original */ |

1441 | { |

1442 | mp_ptr mp = refmpn_malloc_limbs (nsize); |

1443 | |

1444 | refmpn_mul_any (mp, qp, nsize-dsize, dp, dsize); |

1445 | if (retval) |

1446 | ASSERT_NOCARRY (refmpn_add_n (mp+nsize-dsize,mp+nsize-dsize, dp, dsize)); |

1447 | ASSERT_NOCARRY (refmpn_add (mp, mp, nsize, np, dsize)); |

1448 | ASSERT (refmpn_cmp (mp, np_orig, nsize) == 0); |

1449 | |

1450 | free (mp); |

1451 | } |

1452 | |

1453 | free (np_orig); |

1454 | return retval; |

1455 | } |

1456 | |

1457 | /* Similar to mpn/generic/sb_divrem_mn.c, but somewhat simplified, in |

1458 | particular the trial quotient is allowed to be 2 too big. */ |

1459 | void |

1460 | refmpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn, |

1461 | mp_ptr np, mp_size_t nsize, |

1462 | mp_srcptr dp, mp_size_t dsize) |

1463 | { |

1464 | ASSERT (qxn == 0); |

1465 | ASSERT_MPN (np, nsize); |

1466 | ASSERT_MPN (dp, dsize); |

1467 | |

1468 | if (dsize == 1) |

1469 | { |

1470 | rp[0] = refmpn_divmod_1 (qp, np, nsize, dp[0]); |

1471 | return; |

1472 | } |

1473 | else |

1474 | { |

1475 | mp_ptr n2p = refmpn_malloc_limbs (nsize+1); |

1476 | mp_ptr d2p = refmpn_malloc_limbs (dsize); |

1477 | int norm = refmpn_count_leading_zeros (dp[dsize-1]); |

1478 | |

1479 | n2p[nsize] = refmpn_lshift_or_copy (n2p, np, nsize, norm); |

1480 | ASSERT_NOCARRY (refmpn_lshift_or_copy (d2p, dp, dsize, norm)); |

1481 | |

1482 | refmpn_sb_divrem_mn (qp, n2p, nsize+1, d2p, dsize); |

1483 | refmpn_rshift_or_copy (rp, n2p, dsize, norm); |

1484 | |

1485 | /* ASSERT (refmpn_zero_p (tp+dsize, nsize-dsize)); */ |

1486 | free (n2p); |

1487 | free (d2p); |

1488 | } |

1489 | } |

1490 | |

1491 | size_t |

1492 | refmpn_get_str (unsigned char *dst, int base, mp_ptr src, mp_size_t size) |

1493 | { |

1494 | unsigned char *d; |

1495 | size_t dsize; |

1496 | |

1497 | ASSERT (size >= 0); |

1498 | ASSERT (base >= 2); |

1499 | ASSERT (base < numberof (__mp_bases)); |

1500 | ASSERT (size == 0 || src[size-1] != 0); |

1501 | ASSERT_MPN (src, size); |

1502 | |

1503 | MPN_SIZEINBASE (dsize, src, size, base); |

1504 | ASSERT (dsize >= 1); |

1505 | ASSERT (! byte_overlap_p (dst, dsize, src, size * BYTES_PER_MP_LIMB)); |

1506 | |

1507 | if (size == 0) |

1508 | { |

1509 | dst[0] = 0; |

1510 | return 1; |

1511 | } |

1512 | |

1513 | /* don't clobber input for power of 2 bases */ |

1514 | if (POW2_P (base)) |

1515 | src = refmpn_memdup_limbs (src, size); |

1516 | |

1517 | d = dst + dsize; |

1518 | do |

1519 | { |

1520 | d--; |

1521 | ASSERT (d >= dst); |

1522 | *d = refmpn_divrem_1 (src, 0, src, size, (mp_limb_t) base); |

1523 | size -= (src[size-1] == 0); |

1524 | } |

1525 | while (size != 0); |

1526 | |

1527 | /* at most one leading zero */ |

1528 | ASSERT (d == dst || d == dst+1); |

1529 | if (d != dst) |

1530 | *dst = 0; |

1531 | |

1532 | if (POW2_P (base)) |

1533 | free (src); |

1534 | |

1535 | return dsize; |

1536 | } |

1537 | |

1538 | |

1539 | mp_limb_t |

1540 | refmpn_bswap_limb (mp_limb_t src) |

1541 | { |

1542 | mp_limb_t dst; |

1543 | int i; |

1544 | |

1545 | dst = 0; |

1546 | for (i = 0; i < BYTES_PER_MP_LIMB; i++) |

1547 | { |

1548 | dst = (dst << 8) + (src & 0xFF); |

1549 | src >>= 8; |

1550 | } |

1551 | return dst; |

1552 | } |

1553 | |

1554 | |

1555 | /* These random functions are mostly for transitional purposes while adding |

1556 | nail support, since they're independent of the normal mpn routines. They |

1557 | can probably be removed when those normal routines are reliable, though |

1558 | perhaps something independent would still be useful at times. */ |

1559 | |

1560 | #if BITS_PER_MP_LIMB == 32 |

1561 | #define RAND_A CNST_LIMB(0x29CF535) |

1562 | #endif |

1563 | #if BITS_PER_MP_LIMB == 64 |

1564 | #define RAND_A CNST_LIMB(0xBAECD515DAF0B49D) |

1565 | #endif |

1566 | |

1567 | mp_limb_t refmpn_random_seed; |

1568 | |

1569 | mp_limb_t |

1570 | refmpn_random_half (void) |

1571 | { |

1572 | refmpn_random_seed = refmpn_random_seed * RAND_A + 1; |

1573 | return (refmpn_random_seed >> BITS_PER_MP_LIMB/2); |

1574 | } |

1575 | |

1576 | mp_limb_t |

1577 | refmpn_random_limb (void) |

1578 | { |

1579 | return ((refmpn_random_half () << (BITS_PER_MP_LIMB/2)) |

1580 | | refmpn_random_half ()) & GMP_NUMB_MASK; |

1581 | } |

1582 | |

1583 | void |

1584 | refmpn_random (mp_ptr ptr, mp_size_t size) |

1585 | { |

1586 | mp_size_t i; |

1587 | if (GMP_NAIL_BITS == 0) |

1588 | { |

1589 | mpn_random (ptr, size); |

1590 | return; |

1591 | } |

1592 | |

1593 | for (i = 0; i < size; i++) |

1594 | ptr[i] = refmpn_random_limb (); |

1595 | } |

1596 | |

1597 | void |

1598 | refmpn_random2 (mp_ptr ptr, mp_size_t size) |

1599 | { |

1600 | mp_size_t i; |

1601 | mp_limb_t bit, mask, limb; |

1602 | int run; |

1603 | |

1604 | if (GMP_NAIL_BITS == 0) |

1605 | { |

1606 | mpn_random2 (ptr, size); |

1607 | return; |

1608 | } |

1609 | |

1610 | #define RUN_MODULUS 32 |

1611 | |

1612 | /* start with ones at a random pos in the high limb */ |

1613 | bit = CNST_LIMB(1) << (refmpn_random_half () % GMP_NUMB_BITS); |

1614 | mask = 0; |

1615 | run = 0; |

1616 | |

1617 | for (i = size-1; i >= 0; i--) |

1618 | { |

1619 | limb = 0; |

1620 | do |

1621 | { |

1622 | if (run == 0) |

1623 | { |

1624 | run = (refmpn_random_half () % RUN_MODULUS) + 1; |

1625 | mask = ~mask; |

1626 | } |

1627 | |

1628 | limb |= (bit & mask); |

1629 | bit >>= 1; |

1630 | run--; |

1631 | } |

1632 | while (bit != 0); |

1633 | |

1634 | ptr[i] = limb; |

1635 | bit = GMP_NUMB_HIGHBIT; |

1636 | } |

1637 | } |

**Note:**See TracBrowser for help on using the repository browser.