#
source:
trunk/third/gmp/mpz/aorsmul_i.c
@
22254

Revision 22254, 7.1 KB checked in by ghudson, 19 years ago (diff) |
---|

Line | |
---|---|

1 | /* mpz_addmul_ui, mpz_submul_ui - add or subtract small multiple. |

2 | |

3 | THE mpz_aorsmul_1 FUNCTION IN THIS FILE IS FOR INTERNAL USE ONLY AND IS |

4 | ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR |

5 | COMPLETELY IN FUTURE GNU MP RELEASES. |

6 | |

7 | Copyright 2001, 2002, 2004 Free Software Foundation, Inc. |

8 | |

9 | This file is part of the GNU MP Library. |

10 | |

11 | The GNU MP Library is free software; you can redistribute it and/or modify |

12 | it under the terms of the GNU Lesser General Public License as published by |

13 | the Free Software Foundation; either version 2.1 of the License, or (at your |

14 | option) any later version. |

15 | |

16 | The GNU MP Library is distributed in the hope that it will be useful, but |

17 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |

18 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |

19 | License for more details. |

20 | |

21 | You should have received a copy of the GNU Lesser General Public License |

22 | along with the GNU MP Library; see the file COPYING.LIB. If not, write to |

23 | the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |

24 | MA 02111-1307, USA. */ |

25 | |

26 | #include "gmp.h" |

27 | #include "gmp-impl.h" |

28 | |

29 | |

30 | #if HAVE_NATIVE_mpn_mul_1c |

31 | #define MPN_MUL_1C(cout, dst, src, size, n, cin) \ |

32 | do { \ |

33 | (cout) = mpn_mul_1c (dst, src, size, n, cin); \ |

34 | } while (0) |

35 | #else |

36 | #define MPN_MUL_1C(cout, dst, src, size, n, cin) \ |

37 | do { \ |

38 | mp_limb_t __cy; \ |

39 | __cy = mpn_mul_1 (dst, src, size, n); \ |

40 | (cout) = __cy + mpn_add_1 (dst, dst, size, cin); \ |

41 | } while (0) |

42 | #endif |

43 | |

44 | |

45 | /* sub>=0 means an addmul w += x*y, sub<0 means a submul w -= x*y. |

46 | |

47 | All that's needed to account for negative w or x is to flip "sub". |

48 | |

49 | The final w will retain its sign, unless an underflow occurs in a submul |

50 | of absolute values, in which case it's flipped. |

51 | |

52 | If x has more limbs than w, then mpn_submul_1 followed by mpn_com_n is |

53 | used. The alternative would be mpn_mul_1 into temporary space followed |

54 | by mpn_sub_n. Avoiding temporary space seem good, and submul+com stands |

55 | a chance of being faster since it involves only one set of carry |

56 | propagations, not two. Note that doing an addmul_1 with a |

57 | twos-complement negative y doesn't work, because it effectively adds an |

58 | extra x * 2^BITS_PER_MP_LIMB. */ |

59 | |

60 | REGPARM_ATTR (1) void |

61 | mpz_aorsmul_1 (mpz_ptr w, mpz_srcptr x, mp_limb_t y, mp_size_t sub) |

62 | { |

63 | mp_size_t xsize, wsize, wsize_signed, new_wsize, min_size, dsize; |

64 | mp_srcptr xp; |

65 | mp_ptr wp; |

66 | mp_limb_t cy; |

67 | |

68 | /* w unaffected if x==0 or y==0 */ |

69 | xsize = SIZ (x); |

70 | if (xsize == 0 || y == 0) |

71 | return; |

72 | |

73 | sub ^= xsize; |

74 | xsize = ABS (xsize); |

75 | |

76 | wsize_signed = SIZ (w); |

77 | if (wsize_signed == 0) |

78 | { |

79 | /* nothing to add to, just set x*y, "sub" gives the sign */ |

80 | MPZ_REALLOC (w, xsize+1); |

81 | wp = PTR (w); |

82 | cy = mpn_mul_1 (wp, PTR(x), xsize, y); |

83 | wp[xsize] = cy; |

84 | xsize += (cy != 0); |

85 | SIZ (w) = (sub >= 0 ? xsize : -xsize); |

86 | return; |

87 | } |

88 | |

89 | sub ^= wsize_signed; |

90 | wsize = ABS (wsize_signed); |

91 | |

92 | new_wsize = MAX (wsize, xsize); |

93 | MPZ_REALLOC (w, new_wsize+1); |

94 | wp = PTR (w); |

95 | xp = PTR (x); |

96 | min_size = MIN (wsize, xsize); |

97 | |

98 | if (sub >= 0) |

99 | { |

100 | /* addmul of absolute values */ |

101 | |

102 | cy = mpn_addmul_1 (wp, xp, min_size, y); |

103 | wp += min_size; |

104 | xp += min_size; |

105 | |

106 | dsize = xsize - wsize; |

107 | #if HAVE_NATIVE_mpn_mul_1c |

108 | if (dsize > 0) |

109 | cy = mpn_mul_1c (wp, xp, dsize, y, cy); |

110 | else if (dsize < 0) |

111 | { |

112 | dsize = -dsize; |

113 | cy = mpn_add_1 (wp, wp, dsize, cy); |

114 | } |

115 | #else |

116 | if (dsize != 0) |

117 | { |

118 | mp_limb_t cy2; |

119 | if (dsize > 0) |

120 | cy2 = mpn_mul_1 (wp, xp, dsize, y); |

121 | else |

122 | { |

123 | dsize = -dsize; |

124 | cy2 = 0; |

125 | } |

126 | cy = cy2 + mpn_add_1 (wp, wp, dsize, cy); |

127 | } |

128 | #endif |

129 | |

130 | wp[dsize] = cy; |

131 | new_wsize += (cy != 0); |

132 | } |

133 | else |

134 | { |

135 | /* submul of absolute values */ |

136 | |

137 | cy = mpn_submul_1 (wp, xp, min_size, y); |

138 | if (wsize >= xsize) |

139 | { |

140 | /* if w bigger than x, then propagate borrow through it */ |

141 | if (wsize != xsize) |

142 | cy = mpn_sub_1 (wp+xsize, wp+xsize, wsize-xsize, cy); |

143 | |

144 | if (cy != 0) |

145 | { |

146 | /* Borrow out of w, take twos complement negative to get |

147 | absolute value, flip sign of w. */ |

148 | wp[new_wsize] = ~-cy; /* extra limb is 0-cy */ |

149 | mpn_com_n (wp, wp, new_wsize); |

150 | new_wsize++; |

151 | MPN_INCR_U (wp, new_wsize, CNST_LIMB(1)); |

152 | wsize_signed = -wsize_signed; |

153 | } |

154 | } |

155 | else /* wsize < xsize */ |

156 | { |

157 | /* x bigger than w, so want x*y-w. Submul has given w-x*y, so |

158 | take twos complement and use an mpn_mul_1 for the rest. */ |

159 | |

160 | mp_limb_t cy2; |

161 | |

162 | /* -(-cy*b^n + w-x*y) = (cy-1)*b^n + ~(w-x*y) + 1 */ |

163 | mpn_com_n (wp, wp, wsize); |

164 | cy += mpn_add_1 (wp, wp, wsize, CNST_LIMB(1)); |

165 | cy -= 1; |

166 | |

167 | /* If cy-1 == -1 then hold that -1 for latter. mpn_submul_1 never |

168 | returns cy==MP_LIMB_T_MAX so that value always indicates a -1. */ |

169 | cy2 = (cy == MP_LIMB_T_MAX); |

170 | cy += cy2; |

171 | MPN_MUL_1C (cy, wp+wsize, xp+wsize, xsize-wsize, y, cy); |

172 | wp[new_wsize] = cy; |

173 | new_wsize += (cy != 0); |

174 | |

175 | /* Apply any -1 from above. The value at wp+wsize is non-zero |

176 | because y!=0 and the high limb of x will be non-zero. */ |

177 | if (cy2) |

178 | MPN_DECR_U (wp+wsize, new_wsize-wsize, CNST_LIMB(1)); |

179 | |

180 | wsize_signed = -wsize_signed; |

181 | } |

182 | |

183 | /* submul can produce high zero limbs due to cancellation, both when w |

184 | has more limbs or x has more */ |

185 | MPN_NORMALIZE (wp, new_wsize); |

186 | } |

187 | |

188 | SIZ (w) = (wsize_signed >= 0 ? new_wsize : -new_wsize); |

189 | |

190 | ASSERT (new_wsize == 0 || PTR(w)[new_wsize-1] != 0); |

191 | } |

192 | |

193 | |

194 | void |

195 | mpz_addmul_ui (mpz_ptr w, mpz_srcptr x, unsigned long y) |

196 | { |

197 | mpz_aorsmul_1 (w, x, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) 0); |

198 | #if GMP_NAIL_BITS != 0 |

199 | if (y > GMP_NUMB_MAX && SIZ(x) != 0) |

200 | { |

201 | mpz_t t; |

202 | mp_ptr tp; |

203 | mp_size_t xn; |

204 | TMP_DECL (mark); |

205 | TMP_MARK (mark); |

206 | xn = SIZ (x); |

207 | MPZ_TMP_INIT (t, ABS (xn) + 1); |

208 | tp = PTR (t); |

209 | tp[0] = 0; |

210 | MPN_COPY (tp + 1, PTR(x), ABS (xn)); |

211 | SIZ(t) = xn >= 0 ? xn + 1 : xn - 1; |

212 | mpz_aorsmul_1 (w, t, (mp_limb_t) y >> GMP_NUMB_BITS, (mp_size_t) 0); |

213 | TMP_FREE (mark); |

214 | } |

215 | #endif |

216 | } |

217 | |

218 | void |

219 | mpz_submul_ui (mpz_ptr w, mpz_srcptr x, unsigned long y) |

220 | { |

221 | mpz_aorsmul_1 (w, x, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) -1); |

222 | #if GMP_NAIL_BITS != 0 |

223 | if (y > GMP_NUMB_MAX && SIZ(x) != 0) |

224 | { |

225 | mpz_t t; |

226 | mp_ptr tp; |

227 | mp_size_t xn; |

228 | TMP_DECL (mark); |

229 | TMP_MARK (mark); |

230 | xn = SIZ (x); |

231 | MPZ_TMP_INIT (t, ABS (xn) + 1); |

232 | tp = PTR (t); |

233 | tp[0] = 0; |

234 | MPN_COPY (tp + 1, PTR(x), ABS (xn)); |

235 | SIZ(t) = xn >= 0 ? xn + 1 : xn - 1; |

236 | mpz_aorsmul_1 (w, t, (mp_limb_t) y >> GMP_NUMB_BITS, (mp_size_t) -1); |

237 | TMP_FREE (mark); |

238 | } |

239 | #endif |

240 | } |

**Note:**See TracBrowser for help on using the repository browser.