1 | /* Speed measuring program. |
---|
2 | |
---|
3 | Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. |
---|
4 | |
---|
5 | This file is part of the GNU MP Library. |
---|
6 | |
---|
7 | The GNU MP Library is free software; you can redistribute it and/or modify |
---|
8 | it under the terms of the GNU Lesser General Public License as published by |
---|
9 | the Free Software Foundation; either version 2.1 of the License, or (at your |
---|
10 | option) any later version. |
---|
11 | |
---|
12 | The GNU MP Library is distributed in the hope that it will be useful, but |
---|
13 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
---|
14 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
---|
15 | License for more details. |
---|
16 | |
---|
17 | You should have received a copy of the GNU Lesser General Public License |
---|
18 | along with the GNU MP Library; see the file COPYING.LIB. If not, write to |
---|
19 | the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
---|
20 | MA 02111-1307, USA. */ |
---|
21 | |
---|
22 | /* Usage message is in the code below, run with no arguments to print it. |
---|
23 | See README for interesting applications. |
---|
24 | |
---|
25 | To add a new routine foo(), create a speed_foo() function in the style of |
---|
26 | the existing ones and add an entry in the routine[] array. Put FLAG_R if |
---|
27 | speed_foo() wants an "r" parameter. |
---|
28 | |
---|
29 | The routines don't have help messages or descriptions, but most have |
---|
30 | suggestive names. See the source code for full details. |
---|
31 | |
---|
32 | */ |
---|
33 | |
---|
34 | #include "config.h" |
---|
35 | |
---|
36 | #include <limits.h> |
---|
37 | #include <stdio.h> |
---|
38 | #include <stdlib.h> |
---|
39 | #include <string.h> |
---|
40 | |
---|
41 | #if HAVE_UNISTD_H |
---|
42 | #include <unistd.h> /* for getpid, R_OK */ |
---|
43 | #endif |
---|
44 | |
---|
45 | #if TIME_WITH_SYS_TIME |
---|
46 | # include <sys/time.h> /* for struct timeval */ |
---|
47 | # include <time.h> |
---|
48 | #else |
---|
49 | # if HAVE_SYS_TIME_H |
---|
50 | # include <sys/time.h> |
---|
51 | # else |
---|
52 | # include <time.h> |
---|
53 | # endif |
---|
54 | #endif |
---|
55 | |
---|
56 | #if HAVE_SYS_RESOURCE_H |
---|
57 | #include <sys/resource.h> /* for getrusage() */ |
---|
58 | #endif |
---|
59 | |
---|
60 | |
---|
61 | #include "gmp.h" |
---|
62 | #include "gmp-impl.h" |
---|
63 | #include "longlong.h" /* for the benefit of speed-many.c */ |
---|
64 | #include "tests.h" |
---|
65 | #include "speed.h" |
---|
66 | |
---|
67 | |
---|
68 | #if !HAVE_DECL_OPTARG |
---|
69 | extern char *optarg; |
---|
70 | extern int optind, opterr; |
---|
71 | #endif |
---|
72 | |
---|
73 | #if !HAVE_STRTOUL |
---|
74 | #define strtoul(p,e,b) (unsigned long) strtol(p,e,b) |
---|
75 | #endif |
---|
76 | |
---|
77 | #ifdef SPEED_EXTRA_PROTOS |
---|
78 | SPEED_EXTRA_PROTOS |
---|
79 | #endif |
---|
80 | #ifdef SPEED_EXTRA_PROTOS2 |
---|
81 | SPEED_EXTRA_PROTOS2 |
---|
82 | #endif |
---|
83 | |
---|
84 | |
---|
85 | #define MPN_FILL(ptr, size, n) \ |
---|
86 | do { \ |
---|
87 | mp_size_t __i; \ |
---|
88 | ASSERT ((size) >= 0); \ |
---|
89 | for (__i = 0; __i < (size); __i++) \ |
---|
90 | (ptr)[__i] = (n); \ |
---|
91 | } while (0) |
---|
92 | |
---|
93 | |
---|
94 | #if BITS_PER_MP_LIMB == 32 |
---|
95 | #define GMP_NUMB_0xAA (CNST_LIMB(0xAAAAAAAA) & GMP_NUMB_MASK) |
---|
96 | #endif |
---|
97 | #if BITS_PER_MP_LIMB == 64 |
---|
98 | #define GMP_NUMB_0xAA (CNST_LIMB(0xAAAAAAAAAAAAAAAA) & GMP_NUMB_MASK) |
---|
99 | #endif |
---|
100 | |
---|
101 | |
---|
102 | #define CMP_ABSOLUTE 1 |
---|
103 | #define CMP_RATIO 2 |
---|
104 | #define CMP_DIFFERENCE 3 |
---|
105 | #define CMP_DIFFPREV 4 |
---|
106 | int option_cmp = CMP_ABSOLUTE; |
---|
107 | |
---|
108 | #define UNIT_SECONDS 1 |
---|
109 | #define UNIT_CYCLES 2 |
---|
110 | #define UNIT_CYCLESPERLIMB 3 |
---|
111 | int option_unit = UNIT_SECONDS; |
---|
112 | |
---|
113 | #define DATA_RANDOM 1 |
---|
114 | #define DATA_RANDOM2 2 |
---|
115 | #define DATA_ZEROS 3 |
---|
116 | #define DATA_AAS 4 |
---|
117 | #define DATA_FFS 5 |
---|
118 | #define DATA_2FD 6 |
---|
119 | int option_data = DATA_RANDOM; |
---|
120 | |
---|
121 | int option_square = 0; |
---|
122 | double option_factor = 0.0; |
---|
123 | mp_size_t option_step = 1; |
---|
124 | int option_gnuplot = 0; |
---|
125 | char *option_gnuplot_basename; |
---|
126 | struct size_array_t { |
---|
127 | mp_size_t start, end; |
---|
128 | } *size_array = NULL; |
---|
129 | mp_size_t size_num = 0; |
---|
130 | mp_size_t size_allocnum = 0; |
---|
131 | int option_resource_usage = 0; |
---|
132 | long option_seed = 123456789; |
---|
133 | |
---|
134 | struct speed_params sp; |
---|
135 | |
---|
136 | #define COLUMN_WIDTH 13 /* for the free-form output */ |
---|
137 | |
---|
138 | #define FLAG_R (1<<0) /* require ".r" */ |
---|
139 | #define FLAG_R_OPTIONAL (1<<1) /* optional ".r" */ |
---|
140 | #define FLAG_RSIZE (1<<2) |
---|
141 | #define FLAG_NODATA (1<<3) /* don't alloc xp, yp */ |
---|
142 | |
---|
143 | const struct routine_t { |
---|
144 | /* constants */ |
---|
145 | const char *name; |
---|
146 | speed_function_t fun; |
---|
147 | int flag; |
---|
148 | |
---|
149 | } routine[] = { |
---|
150 | |
---|
151 | { "noop", speed_noop }, |
---|
152 | { "noop_wxs", speed_noop_wxs }, |
---|
153 | { "noop_wxys", speed_noop_wxys }, |
---|
154 | |
---|
155 | { "mpn_add_n", speed_mpn_add_n, FLAG_R_OPTIONAL }, |
---|
156 | { "mpn_sub_n", speed_mpn_sub_n, FLAG_R_OPTIONAL }, |
---|
157 | |
---|
158 | { "mpn_addmul_1", speed_mpn_addmul_1, FLAG_R }, |
---|
159 | { "mpn_submul_1", speed_mpn_submul_1, FLAG_R }, |
---|
160 | { "mpn_mul_1", speed_mpn_mul_1, FLAG_R }, |
---|
161 | { "mpn_mul_1_inplace", speed_mpn_mul_1_inplace, FLAG_R }, |
---|
162 | #if HAVE_NATIVE_mpn_mul_2 |
---|
163 | { "mpn_mul_2", speed_mpn_mul_2, FLAG_R }, |
---|
164 | #endif |
---|
165 | |
---|
166 | { "mpn_divrem_1", speed_mpn_divrem_1, FLAG_R }, |
---|
167 | { "mpn_divrem_1f", speed_mpn_divrem_1f, FLAG_R }, |
---|
168 | #if HAVE_NATIVE_mpn_divrem_1c |
---|
169 | { "mpn_divrem_1c", speed_mpn_divrem_1c, FLAG_R }, |
---|
170 | { "mpn_divrem_1cf", speed_mpn_divrem_1cf,FLAG_R }, |
---|
171 | #endif |
---|
172 | { "mpn_mod_1", speed_mpn_mod_1, FLAG_R }, |
---|
173 | #if HAVE_NATIVE_mpn_mod_1c |
---|
174 | { "mpn_mod_1c", speed_mpn_mod_1c, FLAG_R }, |
---|
175 | #endif |
---|
176 | { "mpn_preinv_divrem_1", speed_mpn_preinv_divrem_1, FLAG_R }, |
---|
177 | { "mpn_preinv_divrem_1f", speed_mpn_preinv_divrem_1f, FLAG_R }, |
---|
178 | { "mpn_preinv_mod_1", speed_mpn_preinv_mod_1, FLAG_R }, |
---|
179 | |
---|
180 | { "mpn_divrem_1_div", speed_mpn_divrem_1_div, FLAG_R }, |
---|
181 | { "mpn_divrem_1_inv", speed_mpn_divrem_1_inv, FLAG_R }, |
---|
182 | { "mpn_divrem_1f_div", speed_mpn_divrem_1f_div, FLAG_R }, |
---|
183 | { "mpn_divrem_1f_inv", speed_mpn_divrem_1f_inv, FLAG_R }, |
---|
184 | { "mpn_mod_1_div", speed_mpn_mod_1_div, FLAG_R }, |
---|
185 | { "mpn_mod_1_inv", speed_mpn_mod_1_inv, FLAG_R }, |
---|
186 | |
---|
187 | { "mpn_divrem_2", speed_mpn_divrem_2, }, |
---|
188 | { "mpn_divrem_2_div", speed_mpn_divrem_2_div, }, |
---|
189 | { "mpn_divrem_2_inv", speed_mpn_divrem_2_inv, }, |
---|
190 | |
---|
191 | { "mpn_divexact_1", speed_mpn_divexact_1, FLAG_R }, |
---|
192 | { "mpn_divexact_by3", speed_mpn_divexact_by3 }, |
---|
193 | |
---|
194 | #if HAVE_NATIVE_mpn_modexact_1c_odd |
---|
195 | { "mpn_modexact_1_odd", speed_mpn_modexact_1_odd, FLAG_R }, |
---|
196 | #endif |
---|
197 | { "mpn_modexact_1c_odd", speed_mpn_modexact_1c_odd, FLAG_R }, |
---|
198 | |
---|
199 | { "mpn_dc_tdiv_qr", speed_mpn_dc_tdiv_qr }, |
---|
200 | { "mpn_dc_divrem_n", speed_mpn_dc_divrem_n }, |
---|
201 | { "mpn_dc_divrem_sb", speed_mpn_dc_divrem_sb }, |
---|
202 | { "mpn_dc_divrem_sb_div", speed_mpn_dc_divrem_sb_div }, |
---|
203 | { "mpn_dc_divrem_sb_inv", speed_mpn_dc_divrem_sb_inv }, |
---|
204 | |
---|
205 | { "mpn_sb_divrem_m3", speed_mpn_sb_divrem_m3 }, |
---|
206 | { "mpn_sb_divrem_m3_div", speed_mpn_sb_divrem_m3_div }, |
---|
207 | { "mpn_sb_divrem_m3_inv", speed_mpn_sb_divrem_m3_inv }, |
---|
208 | |
---|
209 | { "mpn_lshift", speed_mpn_lshift, FLAG_R }, |
---|
210 | { "mpn_rshift", speed_mpn_rshift, FLAG_R }, |
---|
211 | |
---|
212 | { "mpn_and_n", speed_mpn_and_n, FLAG_R_OPTIONAL }, |
---|
213 | { "mpn_andn_n", speed_mpn_andn_n, FLAG_R_OPTIONAL }, |
---|
214 | { "mpn_nand_n", speed_mpn_nand_n, FLAG_R_OPTIONAL }, |
---|
215 | { "mpn_ior_n", speed_mpn_ior_n, FLAG_R_OPTIONAL }, |
---|
216 | { "mpn_iorn_n", speed_mpn_iorn_n, FLAG_R_OPTIONAL }, |
---|
217 | { "mpn_nior_n", speed_mpn_nior_n, FLAG_R_OPTIONAL }, |
---|
218 | { "mpn_xor_n", speed_mpn_xor_n, FLAG_R_OPTIONAL }, |
---|
219 | { "mpn_xnor_n", speed_mpn_xnor_n, FLAG_R_OPTIONAL }, |
---|
220 | { "mpn_com_n", speed_mpn_com_n }, |
---|
221 | |
---|
222 | { "mpn_popcount", speed_mpn_popcount }, |
---|
223 | { "mpn_hamdist", speed_mpn_hamdist }, |
---|
224 | |
---|
225 | { "mpn_gcd_1", speed_mpn_gcd_1, FLAG_R_OPTIONAL }, |
---|
226 | { "mpn_gcd_1N", speed_mpn_gcd_1N, FLAG_R_OPTIONAL }, |
---|
227 | |
---|
228 | { "mpn_gcd", speed_mpn_gcd }, |
---|
229 | { "mpn_gcd_binary", speed_mpn_gcd_binary }, |
---|
230 | { "find_a", speed_find_a, FLAG_NODATA }, |
---|
231 | #if HAVE_NATIVE_mpn_gcd_finda |
---|
232 | { "mpn_gcd_finda", speed_mpn_gcd_finda, FLAG_NODATA }, |
---|
233 | #endif |
---|
234 | |
---|
235 | { "mpn_gcdext", speed_mpn_gcdext }, |
---|
236 | { "mpn_gcdext_single", speed_mpn_gcdext_single }, |
---|
237 | { "mpn_gcdext_double", speed_mpn_gcdext_double }, |
---|
238 | { "mpn_gcdext_one_single", speed_mpn_gcdext_one_single }, |
---|
239 | { "mpn_gcdext_one_double", speed_mpn_gcdext_one_double }, |
---|
240 | |
---|
241 | { "mpz_jacobi", speed_mpz_jacobi }, |
---|
242 | { "mpn_jacobi_base", speed_mpn_jacobi_base }, |
---|
243 | { "mpn_jacobi_base_1", speed_mpn_jacobi_base_1 }, |
---|
244 | { "mpn_jacobi_base_2", speed_mpn_jacobi_base_2 }, |
---|
245 | { "mpn_jacobi_base_3", speed_mpn_jacobi_base_3 }, |
---|
246 | |
---|
247 | { "mpn_mul_basecase", speed_mpn_mul_basecase, FLAG_R_OPTIONAL }, |
---|
248 | { "mpn_sqr_basecase", speed_mpn_sqr_basecase }, |
---|
249 | #if HAVE_NATIVE_mpn_sqr_diagonal |
---|
250 | { "mpn_sqr_diagonal", speed_mpn_sqr_diagonal }, |
---|
251 | #endif |
---|
252 | |
---|
253 | { "mpn_mul_n", speed_mpn_mul_n }, |
---|
254 | { "mpn_sqr_n", speed_mpn_sqr_n }, |
---|
255 | |
---|
256 | { "mpn_kara_mul_n", speed_mpn_kara_mul_n }, |
---|
257 | { "mpn_kara_sqr_n", speed_mpn_kara_sqr_n }, |
---|
258 | { "mpn_toom3_mul_n", speed_mpn_toom3_mul_n }, |
---|
259 | { "mpn_toom3_sqr_n", speed_mpn_toom3_sqr_n }, |
---|
260 | { "mpn_mul_fft_full", speed_mpn_mul_fft_full }, |
---|
261 | { "mpn_mul_fft_full_sqr", speed_mpn_mul_fft_full_sqr }, |
---|
262 | |
---|
263 | { "mpn_mul_fft", speed_mpn_mul_fft, FLAG_R_OPTIONAL }, |
---|
264 | { "mpn_mul_fft_sqr", speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL }, |
---|
265 | |
---|
266 | { "mpn_toom3_mul_n_mpn", speed_mpn_toom3_mul_n_mpn }, |
---|
267 | { "mpn_toom3_mul_n_open", speed_mpn_toom3_mul_n_open }, |
---|
268 | { "mpn_toom3_sqr_n_mpn", speed_mpn_toom3_sqr_n_mpn }, |
---|
269 | { "mpn_toom3_sqr_n_open", speed_mpn_toom3_sqr_n_open }, |
---|
270 | |
---|
271 | { "mpn_get_str", speed_mpn_get_str, FLAG_R_OPTIONAL }, |
---|
272 | |
---|
273 | { "mpn_set_str", speed_mpn_set_str, FLAG_R_OPTIONAL }, |
---|
274 | { "mpn_set_str_basecase", speed_mpn_set_str_basecase, FLAG_R_OPTIONAL }, |
---|
275 | { "mpn_set_str_subquad", speed_mpn_set_str_subquad, FLAG_R_OPTIONAL }, |
---|
276 | |
---|
277 | { "mpn_sqrtrem", speed_mpn_sqrtrem }, |
---|
278 | |
---|
279 | { "mpn_fib2_ui", speed_mpn_fib2_ui, FLAG_NODATA }, |
---|
280 | { "mpz_fib_ui", speed_mpz_fib_ui, FLAG_NODATA }, |
---|
281 | { "mpz_fib2_ui", speed_mpz_fib2_ui, FLAG_NODATA }, |
---|
282 | { "mpz_lucnum_ui", speed_mpz_lucnum_ui, FLAG_NODATA }, |
---|
283 | { "mpz_lucnum2_ui", speed_mpz_lucnum2_ui, FLAG_NODATA }, |
---|
284 | |
---|
285 | { "mpz_add", speed_mpz_add }, |
---|
286 | { "mpz_bin_uiui", speed_mpz_bin_uiui, FLAG_NODATA | FLAG_R_OPTIONAL }, |
---|
287 | { "mpz_fac_ui", speed_mpz_fac_ui, FLAG_NODATA }, |
---|
288 | { "mpz_powm", speed_mpz_powm }, |
---|
289 | { "mpz_powm_mod", speed_mpz_powm_mod }, |
---|
290 | { "mpz_powm_redc", speed_mpz_powm_redc }, |
---|
291 | { "mpz_powm_ui", speed_mpz_powm_ui }, |
---|
292 | |
---|
293 | { "mpz_mod", speed_mpz_mod }, |
---|
294 | { "redc", speed_redc }, |
---|
295 | |
---|
296 | { "MPN_COPY", speed_MPN_COPY }, |
---|
297 | { "MPN_COPY_INCR", speed_MPN_COPY_INCR }, |
---|
298 | { "MPN_COPY_DECR", speed_MPN_COPY_DECR }, |
---|
299 | { "memcpy", speed_memcpy }, |
---|
300 | #if HAVE_NATIVE_mpn_copyi |
---|
301 | { "mpn_copyi", speed_mpn_copyi }, |
---|
302 | #endif |
---|
303 | #if HAVE_NATIVE_mpn_copyd |
---|
304 | { "mpn_copyd", speed_mpn_copyd }, |
---|
305 | #endif |
---|
306 | |
---|
307 | { "MPN_ZERO", speed_MPN_ZERO }, |
---|
308 | |
---|
309 | { "modlimb_invert", speed_modlimb_invert, FLAG_NODATA }, |
---|
310 | { "modlimb_invert_mul1", speed_modlimb_invert_mul1, FLAG_NODATA }, |
---|
311 | { "modlimb_invert_loop", speed_modlimb_invert_loop, FLAG_NODATA }, |
---|
312 | { "modlimb_invert_cond", speed_modlimb_invert_cond, FLAG_NODATA }, |
---|
313 | { "modlimb_invert_arith", speed_modlimb_invert_arith, FLAG_NODATA }, |
---|
314 | |
---|
315 | { "malloc_free", speed_malloc_free }, |
---|
316 | { "malloc_realloc_free", speed_malloc_realloc_free }, |
---|
317 | { "gmp_allocate_free", speed_gmp_allocate_free }, |
---|
318 | { "gmp_allocate_reallocate_free", speed_gmp_allocate_reallocate_free }, |
---|
319 | { "mpz_init_clear", speed_mpz_init_clear }, |
---|
320 | { "mpq_init_clear", speed_mpq_init_clear }, |
---|
321 | { "mpf_init_clear", speed_mpf_init_clear }, |
---|
322 | { "mpz_init_realloc_clear", speed_mpz_init_realloc_clear }, |
---|
323 | |
---|
324 | { "umul_ppmm", speed_umul_ppmm, FLAG_R_OPTIONAL }, |
---|
325 | #if HAVE_NATIVE_mpn_umul_ppmm |
---|
326 | { "mpn_umul_ppmm", speed_mpn_umul_ppmm, FLAG_R_OPTIONAL }, |
---|
327 | #endif |
---|
328 | |
---|
329 | { "count_leading_zeros", speed_count_leading_zeros, FLAG_NODATA | FLAG_R_OPTIONAL }, |
---|
330 | { "count_trailing_zeros", speed_count_trailing_zeros, FLAG_NODATA | FLAG_R_OPTIONAL }, |
---|
331 | |
---|
332 | { "udiv_qrnnd", speed_udiv_qrnnd, FLAG_R_OPTIONAL }, |
---|
333 | { "udiv_qrnnd_preinv", speed_udiv_qrnnd_preinv, FLAG_R_OPTIONAL }, |
---|
334 | { "udiv_qrnnd_preinv2norm", speed_udiv_qrnnd_preinv2norm, FLAG_R_OPTIONAL }, |
---|
335 | { "udiv_qrnnd_c", speed_udiv_qrnnd_c, FLAG_R_OPTIONAL }, |
---|
336 | #if HAVE_NATIVE_mpn_udiv_qrnnd |
---|
337 | { "mpn_udiv_qrnnd", speed_mpn_udiv_qrnnd, FLAG_R_OPTIONAL }, |
---|
338 | #endif |
---|
339 | { "invert_limb", speed_invert_limb, FLAG_R_OPTIONAL }, |
---|
340 | |
---|
341 | { "operator_div", speed_operator_div, FLAG_R_OPTIONAL }, |
---|
342 | { "operator_mod", speed_operator_mod, FLAG_R_OPTIONAL }, |
---|
343 | |
---|
344 | #ifdef SPEED_EXTRA_ROUTINES |
---|
345 | SPEED_EXTRA_ROUTINES |
---|
346 | #endif |
---|
347 | #ifdef SPEED_EXTRA_ROUTINES2 |
---|
348 | SPEED_EXTRA_ROUTINES2 |
---|
349 | #endif |
---|
350 | }; |
---|
351 | |
---|
352 | |
---|
353 | struct choice_t { |
---|
354 | const struct routine_t *p; |
---|
355 | mp_limb_t r; |
---|
356 | double scale; |
---|
357 | double time; |
---|
358 | int no_time; |
---|
359 | double prev_time; |
---|
360 | const char *name; |
---|
361 | }; |
---|
362 | struct choice_t *choice; |
---|
363 | int num_choices = 0; |
---|
364 | |
---|
365 | |
---|
366 | void |
---|
367 | data_fill (mp_ptr ptr, mp_size_t size) |
---|
368 | { |
---|
369 | switch (option_data) { |
---|
370 | case DATA_RANDOM: |
---|
371 | mpn_random (ptr, size); |
---|
372 | break; |
---|
373 | case DATA_RANDOM2: |
---|
374 | mpn_random2 (ptr, size); |
---|
375 | break; |
---|
376 | case DATA_ZEROS: |
---|
377 | MPN_ZERO (ptr, size); |
---|
378 | break; |
---|
379 | case DATA_AAS: |
---|
380 | MPN_FILL (ptr, size, GMP_NUMB_0xAA); |
---|
381 | break; |
---|
382 | case DATA_FFS: |
---|
383 | MPN_FILL (ptr, size, GMP_NUMB_MAX); |
---|
384 | break; |
---|
385 | case DATA_2FD: |
---|
386 | MPN_FILL (ptr, size, GMP_NUMB_MAX); |
---|
387 | ptr[0] -= 2; |
---|
388 | break; |
---|
389 | default: |
---|
390 | abort(); |
---|
391 | /*NOTREACHED*/ |
---|
392 | } |
---|
393 | } |
---|
394 | |
---|
395 | /* The code here handling the various combinations of output options isn't |
---|
396 | too attractive, but it works and is fairly clean. */ |
---|
397 | |
---|
398 | #define SIZE_TO_DIVISOR(n) \ |
---|
399 | (option_square == 1 ? (n)*(n) \ |
---|
400 | : option_square == 2 ? (n)*((n)+1)/2 \ |
---|
401 | : (n)) |
---|
402 | |
---|
403 | void |
---|
404 | run_one (FILE *fp, struct speed_params *s, mp_size_t prev_size) |
---|
405 | { |
---|
406 | const char *first_open_fastest, *first_open_notfastest, *first_close; |
---|
407 | int i, fastest, want_data; |
---|
408 | double fastest_time; |
---|
409 | TMP_DECL (marker); |
---|
410 | |
---|
411 | TMP_MARK (marker); |
---|
412 | |
---|
413 | /* allocate data, unless all routines are NODATA */ |
---|
414 | want_data = 0; |
---|
415 | for (i = 0; i < num_choices; i++) |
---|
416 | want_data |= ((choice[i].p->flag & FLAG_NODATA) == 0); |
---|
417 | |
---|
418 | if (want_data) |
---|
419 | { |
---|
420 | sp.xp = SPEED_TMP_ALLOC_LIMBS (s->size, s->align_xp); |
---|
421 | sp.yp = SPEED_TMP_ALLOC_LIMBS (s->size, s->align_yp); |
---|
422 | |
---|
423 | data_fill (s->xp, s->size); |
---|
424 | data_fill (s->yp, s->size); |
---|
425 | } |
---|
426 | else |
---|
427 | { |
---|
428 | sp.xp = NULL; |
---|
429 | sp.yp = NULL; |
---|
430 | } |
---|
431 | |
---|
432 | if (prev_size == -1 && option_cmp == CMP_DIFFPREV) |
---|
433 | { |
---|
434 | first_open_fastest = "(#"; |
---|
435 | first_open_notfastest = " ("; |
---|
436 | first_close = ")"; |
---|
437 | } |
---|
438 | else |
---|
439 | { |
---|
440 | first_open_fastest = "#"; |
---|
441 | first_open_notfastest = " "; |
---|
442 | first_close = ""; |
---|
443 | } |
---|
444 | |
---|
445 | fastest = -1; |
---|
446 | fastest_time = -1.0; |
---|
447 | for (i = 0; i < num_choices; i++) |
---|
448 | { |
---|
449 | s->r = choice[i].r; |
---|
450 | choice[i].time = speed_measure (choice[i].p->fun, s); |
---|
451 | choice[i].no_time = (choice[i].time == -1.0); |
---|
452 | choice[i].time *= choice[i].scale; |
---|
453 | |
---|
454 | /* Apply the effect of CMP_DIFFPREV, but the new choice[i].prev_time |
---|
455 | is before any differences. */ |
---|
456 | { |
---|
457 | double t; |
---|
458 | t = choice[i].time; |
---|
459 | if (t != -1.0 && option_cmp == CMP_DIFFPREV && prev_size != -1) |
---|
460 | { |
---|
461 | if (choice[i].prev_time == -1.0) |
---|
462 | choice[i].no_time = 1; |
---|
463 | else |
---|
464 | choice[i].time = choice[i].time - choice[i].prev_time; |
---|
465 | } |
---|
466 | choice[i].prev_time = t; |
---|
467 | } |
---|
468 | |
---|
469 | if (choice[i].no_time) |
---|
470 | continue; |
---|
471 | |
---|
472 | /* Look for the fastest after CMP_DIFFPREV has been applied, but |
---|
473 | before CMP_RATIO or CMP_DIFFERENCE. There's only a fastest shown |
---|
474 | if there's more than one routine. */ |
---|
475 | if (num_choices > 1 && (fastest == -1 || choice[i].time < fastest_time)) |
---|
476 | { |
---|
477 | fastest = i; |
---|
478 | fastest_time = choice[i].time; |
---|
479 | } |
---|
480 | |
---|
481 | if (option_cmp == CMP_DIFFPREV) |
---|
482 | { |
---|
483 | /* Conversion for UNIT_CYCLESPERLIMB differs in CMP_DIFFPREV. */ |
---|
484 | if (option_unit == UNIT_CYCLES) |
---|
485 | choice[i].time /= speed_cycletime; |
---|
486 | else if (option_unit == UNIT_CYCLESPERLIMB) |
---|
487 | { |
---|
488 | if (prev_size == -1) |
---|
489 | choice[i].time /= speed_cycletime; |
---|
490 | else |
---|
491 | choice[i].time /= (speed_cycletime |
---|
492 | * (SIZE_TO_DIVISOR(s->size) |
---|
493 | - SIZE_TO_DIVISOR(prev_size))); |
---|
494 | } |
---|
495 | } |
---|
496 | else |
---|
497 | { |
---|
498 | if (option_unit == UNIT_CYCLES) |
---|
499 | choice[i].time /= speed_cycletime; |
---|
500 | else if (option_unit == UNIT_CYCLESPERLIMB) |
---|
501 | choice[i].time /= (speed_cycletime * SIZE_TO_DIVISOR(s->size)); |
---|
502 | |
---|
503 | if (option_cmp == CMP_RATIO && i > 0) |
---|
504 | { |
---|
505 | /* A ratio isn't affected by the units chosen. */ |
---|
506 | if (choice[0].no_time || choice[0].time == 0.0) |
---|
507 | choice[i].no_time = 1; |
---|
508 | else |
---|
509 | choice[i].time /= choice[0].time; |
---|
510 | } |
---|
511 | else if (option_cmp == CMP_DIFFERENCE && i > 0) |
---|
512 | { |
---|
513 | if (choice[0].no_time) |
---|
514 | { |
---|
515 | choice[i].no_time = 1; |
---|
516 | continue; |
---|
517 | } |
---|
518 | choice[i].time -= choice[0].time; |
---|
519 | } |
---|
520 | } |
---|
521 | } |
---|
522 | |
---|
523 | if (option_gnuplot) |
---|
524 | { |
---|
525 | /* In CMP_DIFFPREV, don't print anything for the first size, start |
---|
526 | with the second where an actual difference is available. |
---|
527 | |
---|
528 | In CMP_RATIO, print the first column as 1.0. |
---|
529 | |
---|
530 | The 9 decimals printed is much more than the expected precision of |
---|
531 | the measurements actually. */ |
---|
532 | |
---|
533 | if (! (option_cmp == CMP_DIFFPREV && prev_size == -1)) |
---|
534 | { |
---|
535 | fprintf (fp, "%-6ld ", s->size); |
---|
536 | for (i = 0; i < num_choices; i++) |
---|
537 | fprintf (fp, " %.9e", |
---|
538 | choice[i].no_time ? 0.0 |
---|
539 | : (option_cmp == CMP_RATIO && i == 0) ? 1.0 |
---|
540 | : choice[i].time); |
---|
541 | fprintf (fp, "\n"); |
---|
542 | } |
---|
543 | } |
---|
544 | else |
---|
545 | { |
---|
546 | fprintf (fp, "%-6ld ", s->size); |
---|
547 | for (i = 0; i < num_choices; i++) |
---|
548 | { |
---|
549 | char buf[128]; |
---|
550 | int decimals; |
---|
551 | |
---|
552 | if (choice[i].no_time) |
---|
553 | decimals = 0, choice[i].time = 0.0; |
---|
554 | else if (option_unit == UNIT_CYCLESPERLIMB |
---|
555 | || (option_cmp == CMP_RATIO && i > 0)) |
---|
556 | decimals = 4; |
---|
557 | else if (option_unit == UNIT_CYCLES) |
---|
558 | decimals = 2; |
---|
559 | else |
---|
560 | decimals = 9; |
---|
561 | |
---|
562 | sprintf (buf, "%s%.*f%s", |
---|
563 | i == fastest ? first_open_fastest : first_open_notfastest, |
---|
564 | decimals, choice[i].time, first_close); |
---|
565 | fprintf (fp, " %*s", COLUMN_WIDTH, buf); |
---|
566 | } |
---|
567 | fprintf (fp, "\n"); |
---|
568 | } |
---|
569 | |
---|
570 | TMP_FREE (marker); |
---|
571 | } |
---|
572 | |
---|
573 | void |
---|
574 | run_all (FILE *fp) |
---|
575 | { |
---|
576 | mp_size_t prev_size; |
---|
577 | int i; |
---|
578 | TMP_DECL (marker); |
---|
579 | |
---|
580 | TMP_MARK (marker); |
---|
581 | sp.xp_block = SPEED_TMP_ALLOC_LIMBS (SPEED_BLOCK_SIZE, sp.align_xp); |
---|
582 | sp.yp_block = SPEED_TMP_ALLOC_LIMBS (SPEED_BLOCK_SIZE, sp.align_yp); |
---|
583 | |
---|
584 | data_fill (sp.xp_block, SPEED_BLOCK_SIZE); |
---|
585 | data_fill (sp.yp_block, SPEED_BLOCK_SIZE); |
---|
586 | |
---|
587 | for (i = 0; i < size_num; i++) |
---|
588 | { |
---|
589 | sp.size = size_array[i].start; |
---|
590 | prev_size = -1; |
---|
591 | for (;;) |
---|
592 | { |
---|
593 | mp_size_t step; |
---|
594 | |
---|
595 | if (option_data == DATA_2FD && sp.size >= 2) |
---|
596 | sp.xp[sp.size-1] = 2; |
---|
597 | |
---|
598 | run_one (fp, &sp, prev_size); |
---|
599 | prev_size = sp.size; |
---|
600 | |
---|
601 | if (option_data == DATA_2FD && sp.size >= 2) |
---|
602 | sp.xp[sp.size-1] = MP_LIMB_T_MAX; |
---|
603 | |
---|
604 | if (option_factor != 0.0) |
---|
605 | { |
---|
606 | step = (mp_size_t) (sp.size * option_factor - sp.size); |
---|
607 | if (step < 1) |
---|
608 | step = 1; |
---|
609 | } |
---|
610 | else |
---|
611 | step = 1; |
---|
612 | if (step < option_step) |
---|
613 | step = option_step; |
---|
614 | |
---|
615 | sp.size += step; |
---|
616 | if (sp.size > size_array[i].end) |
---|
617 | break; |
---|
618 | } |
---|
619 | } |
---|
620 | |
---|
621 | TMP_FREE (marker); |
---|
622 | } |
---|
623 | |
---|
624 | |
---|
625 | FILE * |
---|
626 | fopen_for_write (const char *filename) |
---|
627 | { |
---|
628 | FILE *fp; |
---|
629 | if ((fp = fopen (filename, "w")) == NULL) |
---|
630 | { |
---|
631 | fprintf (stderr, "Cannot create %s\n", filename); |
---|
632 | exit(1); |
---|
633 | } |
---|
634 | return fp; |
---|
635 | } |
---|
636 | |
---|
637 | void |
---|
638 | fclose_written (FILE *fp, const char *filename) |
---|
639 | { |
---|
640 | int err; |
---|
641 | |
---|
642 | err = ferror (fp); |
---|
643 | err |= fclose (fp); |
---|
644 | |
---|
645 | if (err) |
---|
646 | { |
---|
647 | fprintf (stderr, "Error writing %s\n", filename); |
---|
648 | exit(1); |
---|
649 | } |
---|
650 | } |
---|
651 | |
---|
652 | |
---|
653 | void |
---|
654 | run_gnuplot (int argc, char *argv[]) |
---|
655 | { |
---|
656 | char *plot_filename; |
---|
657 | char *data_filename; |
---|
658 | FILE *fp; |
---|
659 | int i; |
---|
660 | |
---|
661 | plot_filename = (char *) (*__gmp_allocate_func) |
---|
662 | (strlen (option_gnuplot_basename) + 20); |
---|
663 | data_filename = (char *) (*__gmp_allocate_func) |
---|
664 | (strlen (option_gnuplot_basename) + 20); |
---|
665 | |
---|
666 | sprintf (plot_filename, "%s.gnuplot", option_gnuplot_basename); |
---|
667 | sprintf (data_filename, "%s.data", option_gnuplot_basename); |
---|
668 | |
---|
669 | fp = fopen_for_write (plot_filename); |
---|
670 | |
---|
671 | fprintf (fp, "# Generated with:\n"); |
---|
672 | fprintf (fp, "#"); |
---|
673 | for (i = 0; i < argc; i++) |
---|
674 | fprintf (fp, " %s", argv[i]); |
---|
675 | fprintf (fp, "\n"); |
---|
676 | fprintf (fp, "\n"); |
---|
677 | |
---|
678 | /* Putting the key at the top left is usually good, and you can change it |
---|
679 | interactively if it's not. */ |
---|
680 | fprintf (fp, "set key left\n"); |
---|
681 | |
---|
682 | /* designed to make it possible to see crossovers easily */ |
---|
683 | fprintf (fp, "set data style linespoints\n"); |
---|
684 | |
---|
685 | fprintf (fp, "plot "); |
---|
686 | for (i = 0; i < num_choices; i++) |
---|
687 | { |
---|
688 | fprintf (fp, " \"%s\" using 1:%d", data_filename, i+2); |
---|
689 | fprintf (fp, " title \"%s\"", choice[i].name); |
---|
690 | |
---|
691 | if (i != num_choices-1) |
---|
692 | fprintf (fp, ", \\"); |
---|
693 | fprintf (fp, "\n"); |
---|
694 | } |
---|
695 | |
---|
696 | fprintf (fp, "load \"-\"\n"); |
---|
697 | fclose_written (fp, plot_filename); |
---|
698 | |
---|
699 | fp = fopen_for_write (data_filename); |
---|
700 | |
---|
701 | /* Unbuffered so you can see where the program was up to if it crashes or |
---|
702 | you kill it. */ |
---|
703 | setbuf (fp, NULL); |
---|
704 | |
---|
705 | run_all (fp); |
---|
706 | fclose_written (fp, data_filename); |
---|
707 | } |
---|
708 | |
---|
709 | |
---|
710 | /* Return a limb with n many one bits (starting from the least significant) */ |
---|
711 | |
---|
712 | #define LIMB_ONES(n) \ |
---|
713 | ((n) == BITS_PER_MP_LIMB ? MP_LIMB_T_MAX \ |
---|
714 | : (n) == 0 ? CNST_LIMB(0) \ |
---|
715 | : (CNST_LIMB(1) << (n)) - 1) |
---|
716 | |
---|
717 | mp_limb_t |
---|
718 | r_string (const char *s) |
---|
719 | { |
---|
720 | const char *s_orig = s; |
---|
721 | long n; |
---|
722 | |
---|
723 | if (strcmp (s, "aas") == 0) |
---|
724 | return GMP_NUMB_0xAA; |
---|
725 | |
---|
726 | { |
---|
727 | mpz_t z; |
---|
728 | mp_limb_t l; |
---|
729 | int set, siz; |
---|
730 | |
---|
731 | mpz_init (z); |
---|
732 | set = mpz_set_str (z, s, 0); |
---|
733 | siz = SIZ(z); |
---|
734 | l = (siz == 0 ? 0 : siz > 0 ? PTR(z)[0] : -PTR(z)[0]); |
---|
735 | mpz_clear (z); |
---|
736 | if (set == 0) |
---|
737 | { |
---|
738 | if (siz > 1 || siz < -1) |
---|
739 | printf ("Warning, r parameter %s truncated to %d bits\n", |
---|
740 | s_orig, BITS_PER_MP_LIMB); |
---|
741 | return l; |
---|
742 | } |
---|
743 | } |
---|
744 | |
---|
745 | if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) |
---|
746 | n = strtoul (s+2, (char **) &s, 16); |
---|
747 | else |
---|
748 | n = strtol (s, (char **) &s, 10); |
---|
749 | |
---|
750 | if (strcmp (s, "bits") == 0) |
---|
751 | { |
---|
752 | mp_limb_t l; |
---|
753 | if (n > BITS_PER_MP_LIMB) |
---|
754 | { |
---|
755 | fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n", |
---|
756 | n, BITS_PER_MP_LIMB); |
---|
757 | exit (1); |
---|
758 | } |
---|
759 | mpn_random (&l, 1); |
---|
760 | return (l | (1 << (n-1))) & LIMB_ONES(n); |
---|
761 | } |
---|
762 | else if (strcmp (s, "ones") == 0) |
---|
763 | { |
---|
764 | if (n > BITS_PER_MP_LIMB) |
---|
765 | { |
---|
766 | fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n", |
---|
767 | n, BITS_PER_MP_LIMB); |
---|
768 | exit (1); |
---|
769 | } |
---|
770 | return LIMB_ONES (n); |
---|
771 | } |
---|
772 | else if (*s != '\0') |
---|
773 | { |
---|
774 | fprintf (stderr, "invalid r parameter: %s\n", s_orig); |
---|
775 | exit (1); |
---|
776 | } |
---|
777 | |
---|
778 | return n; |
---|
779 | } |
---|
780 | |
---|
781 | |
---|
782 | void |
---|
783 | routine_find (struct choice_t *c, const char *s_orig) |
---|
784 | { |
---|
785 | const char *s; |
---|
786 | int i; |
---|
787 | size_t nlen; |
---|
788 | |
---|
789 | c->name = s_orig; |
---|
790 | s = strchr (s_orig, '*'); |
---|
791 | if (s != NULL) |
---|
792 | { |
---|
793 | c->scale = atof(s_orig); |
---|
794 | s++; |
---|
795 | } |
---|
796 | else |
---|
797 | { |
---|
798 | c->scale = 1.0; |
---|
799 | s = s_orig; |
---|
800 | } |
---|
801 | |
---|
802 | for (i = 0; i < numberof (routine); i++) |
---|
803 | { |
---|
804 | nlen = strlen (routine[i].name); |
---|
805 | if (memcmp (s, routine[i].name, nlen) != 0) |
---|
806 | continue; |
---|
807 | |
---|
808 | if (s[nlen] == '.') |
---|
809 | { |
---|
810 | /* match, with a .r parameter */ |
---|
811 | |
---|
812 | if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL))) |
---|
813 | { |
---|
814 | fprintf (stderr, |
---|
815 | "Choice %s bad: doesn't take a \".<r>\" parameter\n", |
---|
816 | s_orig); |
---|
817 | exit (1); |
---|
818 | } |
---|
819 | |
---|
820 | c->p = &routine[i]; |
---|
821 | c->r = r_string (s + nlen + 1); |
---|
822 | return; |
---|
823 | } |
---|
824 | |
---|
825 | if (s[nlen] == '\0') |
---|
826 | { |
---|
827 | /* match, with no parameter */ |
---|
828 | |
---|
829 | if (routine[i].flag & FLAG_R) |
---|
830 | { |
---|
831 | fprintf (stderr, |
---|
832 | "Choice %s bad: needs a \".<r>\" parameter\n", |
---|
833 | s_orig); |
---|
834 | exit (1); |
---|
835 | } |
---|
836 | |
---|
837 | c->p = &routine[i]; |
---|
838 | c->r = 0; |
---|
839 | return; |
---|
840 | } |
---|
841 | } |
---|
842 | |
---|
843 | fprintf (stderr, "Choice %s unrecognised\n", s_orig); |
---|
844 | exit (1); |
---|
845 | } |
---|
846 | |
---|
847 | |
---|
848 | void |
---|
849 | usage (void) |
---|
850 | { |
---|
851 | int i; |
---|
852 | |
---|
853 | speed_time_init (); |
---|
854 | |
---|
855 | printf ("\ |
---|
856 | Usage: speed [-options] -s size <routine>...\n\ |
---|
857 | Measure the speed of some routines.\n\ |
---|
858 | Times are in seconds, accuracy is shown.\n\ |
---|
859 | \n\ |
---|
860 | -p num set precision as number of time units each routine must run\n\ |
---|
861 | -s size[-end][,size[-end]]... sizes to measure\n\ |
---|
862 | single sizes or ranges, sep with comma or use multiple -s\n\ |
---|
863 | -t step step through sizes by given amount\n\ |
---|
864 | -f factor step through sizes by given factor (eg. 1.05)\n\ |
---|
865 | -r show times as ratios of the first routine\n\ |
---|
866 | -d show times as difference from the first routine\n\ |
---|
867 | -D show times as difference from previous size shown\n\ |
---|
868 | -c show times in CPU cycles\n\ |
---|
869 | -C show times in cycles per limb\n\ |
---|
870 | -u print resource usage (memory) at end\n\ |
---|
871 | -P name output plot files \"name.gnuplot\" and \"name.data\"\n\ |
---|
872 | -a <type> use given data: random(default), random2, zeros, aas, ffs, 2fd\n\ |
---|
873 | -x, -y, -w, -W <align> specify data alignments, sources and dests\n\ |
---|
874 | -o addrs print addresses of data blocks\n\ |
---|
875 | \n\ |
---|
876 | If both -t and -f are used, it means step by the factor or the step, whichever\n\ |
---|
877 | is greater.\n\ |
---|
878 | If both -C and -D are used, it means cycles per however many limbs between a\n\ |
---|
879 | size and the previous size.\n\ |
---|
880 | \n\ |
---|
881 | After running with -P, plots can be viewed with Gnuplot or Quickplot.\n\ |
---|
882 | \"gnuplot name.gnuplot\" (use \"set logscale xy; replot\" at the prompt for\n\ |
---|
883 | a log/log plot).\n\ |
---|
884 | \"quickplot -s name.data\" (has interactive zooming, and note -s is important\n\ |
---|
885 | when viewing more than one routine, it means same axis scales for all data).\n\ |
---|
886 | \n\ |
---|
887 | The available routines are as follows.\n\ |
---|
888 | \n\ |
---|
889 | "); |
---|
890 | |
---|
891 | for (i = 0; i < numberof (routine); i++) |
---|
892 | { |
---|
893 | if (routine[i].flag & FLAG_R) |
---|
894 | printf ("\t%s.r\n", routine[i].name); |
---|
895 | else if (routine[i].flag & FLAG_R_OPTIONAL) |
---|
896 | printf ("\t%s (optional .r)\n", routine[i].name); |
---|
897 | else |
---|
898 | printf ("\t%s\n", routine[i].name); |
---|
899 | } |
---|
900 | |
---|
901 | printf ("\n\ |
---|
902 | Routines with a \".r\" need an extra parameter, for example mpn_lshift.6\n\ |
---|
903 | r should be in decimal, or use 0xN for hexadecimal.\n\ |
---|
904 | \n\ |
---|
905 | Special forms for r are \"<N>bits\" for a random N bit number, \"<N>ones\" for\n\ |
---|
906 | N one bits, or \"aas\" for 0xAA..AA.\n\ |
---|
907 | \n\ |
---|
908 | Times for sizes out of the range accepted by a routine are shown as 0.\n\ |
---|
909 | The fastest routine at each size is marked with a # (free form output only).\n\ |
---|
910 | \n\ |
---|
911 | %s\ |
---|
912 | \n\ |
---|
913 | Gnuplot home page http://www.cs.dartmouth.edu/gnuplot_info.html\n\ |
---|
914 | Quickplot home page http://www.kachinatech.com/~quickplot\n\ |
---|
915 | ", speed_time_string); |
---|
916 | } |
---|
917 | |
---|
918 | int |
---|
919 | main (int argc, char *argv[]) |
---|
920 | { |
---|
921 | int i; |
---|
922 | int opt; |
---|
923 | |
---|
924 | /* Unbuffered so output goes straight out when directed to a pipe or file |
---|
925 | and isn't lost on killing the program half way. */ |
---|
926 | setbuf (stdout, NULL); |
---|
927 | |
---|
928 | for (;;) |
---|
929 | { |
---|
930 | opt = getopt(argc, argv, "a:CcDdEFf:o:p:P:rRs:t:ux:y:w:W:z"); |
---|
931 | if (opt == EOF) |
---|
932 | break; |
---|
933 | |
---|
934 | switch (opt) { |
---|
935 | case 'a': |
---|
936 | if (strcmp (optarg, "random") == 0) option_data = DATA_RANDOM; |
---|
937 | else if (strcmp (optarg, "random2") == 0) option_data = DATA_RANDOM2; |
---|
938 | else if (strcmp (optarg, "zeros") == 0) option_data = DATA_ZEROS; |
---|
939 | else if (strcmp (optarg, "aas") == 0) option_data = DATA_AAS; |
---|
940 | else if (strcmp (optarg, "ffs") == 0) option_data = DATA_FFS; |
---|
941 | else if (strcmp (optarg, "2fd") == 0) option_data = DATA_2FD; |
---|
942 | else |
---|
943 | { |
---|
944 | fprintf (stderr, "unrecognised data option: %s\n", optarg); |
---|
945 | exit (1); |
---|
946 | } |
---|
947 | break; |
---|
948 | case 'C': |
---|
949 | if (option_unit != UNIT_SECONDS) goto bad_unit; |
---|
950 | option_unit = UNIT_CYCLESPERLIMB; |
---|
951 | break; |
---|
952 | case 'c': |
---|
953 | if (option_unit != UNIT_SECONDS) |
---|
954 | { |
---|
955 | bad_unit: |
---|
956 | fprintf (stderr, "cannot use more than one of -c, -C\n"); |
---|
957 | exit (1); |
---|
958 | } |
---|
959 | option_unit = UNIT_CYCLES; |
---|
960 | break; |
---|
961 | case 'D': |
---|
962 | if (option_cmp != CMP_ABSOLUTE) goto bad_cmp; |
---|
963 | option_cmp = CMP_DIFFPREV; |
---|
964 | break; |
---|
965 | case 'd': |
---|
966 | if (option_cmp != CMP_ABSOLUTE) |
---|
967 | { |
---|
968 | bad_cmp: |
---|
969 | fprintf (stderr, "cannot use more than one of -d, -D, -r\n"); |
---|
970 | exit (1); |
---|
971 | } |
---|
972 | option_cmp = CMP_DIFFERENCE; |
---|
973 | break; |
---|
974 | case 'E': |
---|
975 | option_square = 1; |
---|
976 | break; |
---|
977 | case 'F': |
---|
978 | option_square = 2; |
---|
979 | break; |
---|
980 | case 'f': |
---|
981 | option_factor = atof (optarg); |
---|
982 | if (option_factor <= 1.0) |
---|
983 | { |
---|
984 | fprintf (stderr, "-f factor must be > 1.0\n"); |
---|
985 | exit (1); |
---|
986 | } |
---|
987 | break; |
---|
988 | case 'o': |
---|
989 | speed_option_set (optarg); |
---|
990 | break; |
---|
991 | case 'P': |
---|
992 | option_gnuplot = 1; |
---|
993 | option_gnuplot_basename = optarg; |
---|
994 | break; |
---|
995 | case 'p': |
---|
996 | speed_precision = atoi (optarg); |
---|
997 | break; |
---|
998 | case 'R': |
---|
999 | option_seed = time (NULL); |
---|
1000 | break; |
---|
1001 | case 'r': |
---|
1002 | if (option_cmp != CMP_ABSOLUTE) |
---|
1003 | goto bad_cmp; |
---|
1004 | option_cmp = CMP_RATIO; |
---|
1005 | break; |
---|
1006 | case 's': |
---|
1007 | { |
---|
1008 | char *s; |
---|
1009 | for (s = strtok (optarg, ","); s != NULL; s = strtok (NULL, ",")) |
---|
1010 | { |
---|
1011 | if (size_num == size_allocnum) |
---|
1012 | { |
---|
1013 | size_array = (struct size_array_t *) |
---|
1014 | __gmp_allocate_or_reallocate |
---|
1015 | (size_array, |
---|
1016 | size_allocnum * sizeof(size_array[0]), |
---|
1017 | (size_allocnum+10) * sizeof(size_array[0])); |
---|
1018 | size_allocnum += 10; |
---|
1019 | } |
---|
1020 | if (sscanf (s, "%ld-%ld", |
---|
1021 | &size_array[size_num].start, |
---|
1022 | &size_array[size_num].end) != 2) |
---|
1023 | { |
---|
1024 | size_array[size_num].start = size_array[size_num].end |
---|
1025 | = atol (s); |
---|
1026 | } |
---|
1027 | |
---|
1028 | if (size_array[size_num].start < 0 |
---|
1029 | || size_array[size_num].end < 0 |
---|
1030 | || size_array[size_num].start > size_array[size_num].end) |
---|
1031 | { |
---|
1032 | fprintf (stderr, "invalid size parameter: %s\n", s); |
---|
1033 | exit (1); |
---|
1034 | } |
---|
1035 | |
---|
1036 | size_num++; |
---|
1037 | } |
---|
1038 | } |
---|
1039 | break; |
---|
1040 | case 't': |
---|
1041 | option_step = atol (optarg); |
---|
1042 | if (option_step < 1) |
---|
1043 | { |
---|
1044 | fprintf (stderr, "-t step must be >= 1\n"); |
---|
1045 | exit (1); |
---|
1046 | } |
---|
1047 | break; |
---|
1048 | case 'u': |
---|
1049 | option_resource_usage = 1; |
---|
1050 | break; |
---|
1051 | case 'z': |
---|
1052 | sp.cache = 1; |
---|
1053 | break; |
---|
1054 | case 'x': |
---|
1055 | sp.align_xp = atol (optarg); |
---|
1056 | break; |
---|
1057 | case 'y': |
---|
1058 | sp.align_yp = atol (optarg); |
---|
1059 | break; |
---|
1060 | case 'w': |
---|
1061 | sp.align_wp = atol (optarg); |
---|
1062 | break; |
---|
1063 | case 'W': |
---|
1064 | sp.align_wp2 = atol (optarg); |
---|
1065 | break; |
---|
1066 | case '?': |
---|
1067 | exit(1); |
---|
1068 | } |
---|
1069 | } |
---|
1070 | |
---|
1071 | if (optind >= argc) |
---|
1072 | { |
---|
1073 | usage (); |
---|
1074 | exit (1); |
---|
1075 | } |
---|
1076 | |
---|
1077 | if (size_num == 0) |
---|
1078 | { |
---|
1079 | fprintf (stderr, "-s <size> must be specified\n"); |
---|
1080 | exit (1); |
---|
1081 | } |
---|
1082 | |
---|
1083 | gmp_randseed_ui (RANDS, option_seed); |
---|
1084 | |
---|
1085 | choice = (struct choice_t *) (*__gmp_allocate_func) |
---|
1086 | ((argc - optind) * sizeof(choice[0])); |
---|
1087 | for ( ; optind < argc; optind++) |
---|
1088 | { |
---|
1089 | struct choice_t c; |
---|
1090 | routine_find (&c, argv[optind]); |
---|
1091 | choice[num_choices] = c; |
---|
1092 | num_choices++; |
---|
1093 | } |
---|
1094 | |
---|
1095 | if ((option_cmp == CMP_RATIO || option_cmp == CMP_DIFFERENCE) && |
---|
1096 | num_choices < 2) |
---|
1097 | { |
---|
1098 | fprintf (stderr, "WARNING, -d or -r does nothing when only one routine requested\n"); |
---|
1099 | } |
---|
1100 | |
---|
1101 | speed_time_init (); |
---|
1102 | if (option_unit == UNIT_CYCLES || option_unit == UNIT_CYCLESPERLIMB) |
---|
1103 | speed_cycletime_need_cycles (); |
---|
1104 | else |
---|
1105 | speed_cycletime_need_seconds (); |
---|
1106 | |
---|
1107 | if (option_gnuplot) |
---|
1108 | { |
---|
1109 | run_gnuplot (argc, argv); |
---|
1110 | } |
---|
1111 | else |
---|
1112 | { |
---|
1113 | if (option_unit == UNIT_SECONDS) |
---|
1114 | printf ("overhead %.9f secs", speed_measure (speed_noop, NULL)); |
---|
1115 | else |
---|
1116 | printf ("overhead %.2f cycles", |
---|
1117 | speed_measure (speed_noop, NULL) / speed_cycletime); |
---|
1118 | printf (", precision %d units of %.2e secs", |
---|
1119 | speed_precision, speed_unittime); |
---|
1120 | |
---|
1121 | if (speed_cycletime == 1.0 || speed_cycletime == 0.0) |
---|
1122 | printf (", CPU freq unknown\n"); |
---|
1123 | else |
---|
1124 | printf (", CPU freq %.2f MHz\n", 1e-6/speed_cycletime); |
---|
1125 | |
---|
1126 | printf (" "); |
---|
1127 | for (i = 0; i < num_choices; i++) |
---|
1128 | printf (" %*s", COLUMN_WIDTH, choice[i].name); |
---|
1129 | printf ("\n"); |
---|
1130 | |
---|
1131 | run_all (stdout); |
---|
1132 | } |
---|
1133 | |
---|
1134 | if (option_resource_usage) |
---|
1135 | { |
---|
1136 | #if HAVE_GETRUSAGE |
---|
1137 | { |
---|
1138 | /* This doesn't give data sizes on linux 2.0.x, only utime. */ |
---|
1139 | struct rusage r; |
---|
1140 | if (getrusage (RUSAGE_SELF, &r) != 0) |
---|
1141 | perror ("getrusage"); |
---|
1142 | else |
---|
1143 | printf ("getrusage(): utime %ld.%06ld data %ld stack %ld maxresident %ld\n", |
---|
1144 | r.ru_utime.tv_sec, r.ru_utime.tv_usec, |
---|
1145 | r.ru_idrss, r.ru_isrss, r.ru_ixrss); |
---|
1146 | } |
---|
1147 | #else |
---|
1148 | printf ("getrusage() not available\n"); |
---|
1149 | #endif |
---|
1150 | |
---|
1151 | /* Linux kernel. */ |
---|
1152 | { |
---|
1153 | char buf[128]; |
---|
1154 | sprintf (buf, "/proc/%d/status", getpid()); |
---|
1155 | if (access (buf, R_OK) == 0) |
---|
1156 | { |
---|
1157 | sprintf (buf, "cat /proc/%d/status", getpid()); |
---|
1158 | system (buf); |
---|
1159 | } |
---|
1160 | |
---|
1161 | } |
---|
1162 | } |
---|
1163 | |
---|
1164 | return 0; |
---|
1165 | } |
---|
1166 | |
---|
1167 | |
---|