source: trunk/third/gmp/tune/speed.c @ 18191

Revision 18191, 33.7 KB checked in by ghudson, 22 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r18190, which included commits to RCS files with non-trunk default branches.
Line 
1/* Speed measuring program.
2
3Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
4
5This file is part of the GNU MP Library.
6
7The GNU MP Library is free software; you can redistribute it and/or modify
8it under the terms of the GNU Lesser General Public License as published by
9the Free Software Foundation; either version 2.1 of the License, or (at your
10option) any later version.
11
12The GNU MP Library is distributed in the hope that it will be useful, but
13WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15License for more details.
16
17You should have received a copy of the GNU Lesser General Public License
18along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
19the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20MA 02111-1307, USA. */
21
22/* Usage message is in the code below, run with no arguments to print it.
23   See README for interesting applications.
24
25   To add a new routine foo(), create a speed_foo() function in the style of
26   the existing ones and add an entry in the routine[] array.  Put FLAG_R if
27   speed_foo() wants an "r" parameter.
28
29   The routines don't have help messages or descriptions, but most have
30   suggestive names.  See the source code for full details.
31
32*/
33
34#include "config.h"
35
36#include <limits.h>
37#include <stdio.h>
38#include <stdlib.h>
39#include <string.h>
40
41#if HAVE_UNISTD_H
42#include <unistd.h>  /* for getpid, R_OK */
43#endif
44
45#if TIME_WITH_SYS_TIME
46# include <sys/time.h>  /* for struct timeval */
47# include <time.h>
48#else
49# if HAVE_SYS_TIME_H
50#  include <sys/time.h>
51# else
52#  include <time.h>
53# endif
54#endif
55
56#if HAVE_SYS_RESOURCE_H
57#include <sys/resource.h>  /* for getrusage() */
58#endif
59
60
61#include "gmp.h"
62#include "gmp-impl.h"
63#include "longlong.h"  /* for the benefit of speed-many.c */
64#include "tests.h"
65#include "speed.h"
66
67
68#if !HAVE_DECL_OPTARG
69extern char *optarg;
70extern int optind, opterr;
71#endif
72
73#if !HAVE_STRTOUL
74#define strtoul(p,e,b)  (unsigned long) strtol(p,e,b)
75#endif
76
77#ifdef SPEED_EXTRA_PROTOS
78SPEED_EXTRA_PROTOS
79#endif
80#ifdef SPEED_EXTRA_PROTOS2
81SPEED_EXTRA_PROTOS2
82#endif
83
84
85#define MPN_FILL(ptr, size, n)          \
86  do {                                  \
87    mp_size_t __i;                      \
88    ASSERT ((size) >= 0);               \
89    for (__i = 0; __i < (size); __i++)  \
90      (ptr)[__i] = (n);                 \
91  } while (0)
92
93
94#if BITS_PER_MP_LIMB == 32
95#define GMP_NUMB_0xAA  (CNST_LIMB(0xAAAAAAAA) & GMP_NUMB_MASK)
96#endif
97#if BITS_PER_MP_LIMB == 64
98#define GMP_NUMB_0xAA  (CNST_LIMB(0xAAAAAAAAAAAAAAAA) & GMP_NUMB_MASK)
99#endif
100
101
102#define CMP_ABSOLUTE     1
103#define CMP_RATIO        2
104#define CMP_DIFFERENCE   3
105#define CMP_DIFFPREV     4
106int  option_cmp = CMP_ABSOLUTE;
107
108#define UNIT_SECONDS        1
109#define UNIT_CYCLES         2
110#define UNIT_CYCLESPERLIMB  3
111int  option_unit = UNIT_SECONDS;
112
113#define DATA_RANDOM   1
114#define DATA_RANDOM2  2
115#define DATA_ZEROS    3
116#define DATA_AAS      4
117#define DATA_FFS      5
118#define DATA_2FD      6
119int  option_data = DATA_RANDOM;
120
121int        option_square = 0;
122double     option_factor = 0.0;
123mp_size_t  option_step = 1;
124int        option_gnuplot = 0;
125char      *option_gnuplot_basename;
126struct size_array_t {
127  mp_size_t start, end;
128} *size_array = NULL;
129mp_size_t  size_num = 0;
130mp_size_t  size_allocnum = 0;
131int        option_resource_usage = 0;
132long       option_seed = 123456789;
133
134struct speed_params  sp;
135
136#define COLUMN_WIDTH  13  /* for the free-form output */
137
138#define FLAG_R            (1<<0)  /* require ".r" */
139#define FLAG_R_OPTIONAL   (1<<1)  /* optional ".r" */
140#define FLAG_RSIZE        (1<<2)
141#define FLAG_NODATA       (1<<3)  /* don't alloc xp, yp */
142
143const struct routine_t {
144  /* constants */
145  const char        *name;
146  speed_function_t  fun;
147  int               flag;
148 
149} routine[] = {
150
151  { "noop",              speed_noop                 },
152  { "noop_wxs",          speed_noop_wxs             },
153  { "noop_wxys",         speed_noop_wxys            },
154
155  { "mpn_add_n",         speed_mpn_add_n,     FLAG_R_OPTIONAL },
156  { "mpn_sub_n",         speed_mpn_sub_n,     FLAG_R_OPTIONAL },
157
158  { "mpn_addmul_1",      speed_mpn_addmul_1,  FLAG_R },
159  { "mpn_submul_1",      speed_mpn_submul_1,  FLAG_R },
160  { "mpn_mul_1",         speed_mpn_mul_1,     FLAG_R },
161  { "mpn_mul_1_inplace", speed_mpn_mul_1_inplace, FLAG_R },
162#if HAVE_NATIVE_mpn_mul_2
163  { "mpn_mul_2",         speed_mpn_mul_2,     FLAG_R },
164#endif
165
166  { "mpn_divrem_1",      speed_mpn_divrem_1,  FLAG_R },
167  { "mpn_divrem_1f",     speed_mpn_divrem_1f, FLAG_R },
168#if HAVE_NATIVE_mpn_divrem_1c
169  { "mpn_divrem_1c",     speed_mpn_divrem_1c, FLAG_R },
170  { "mpn_divrem_1cf",    speed_mpn_divrem_1cf,FLAG_R },
171#endif
172  { "mpn_mod_1",         speed_mpn_mod_1,     FLAG_R },
173#if HAVE_NATIVE_mpn_mod_1c
174  { "mpn_mod_1c",        speed_mpn_mod_1c,    FLAG_R },
175#endif
176  { "mpn_preinv_divrem_1",  speed_mpn_preinv_divrem_1,  FLAG_R },
177  { "mpn_preinv_divrem_1f", speed_mpn_preinv_divrem_1f, FLAG_R },
178  { "mpn_preinv_mod_1",  speed_mpn_preinv_mod_1, FLAG_R },
179
180  { "mpn_divrem_1_div",  speed_mpn_divrem_1_div,  FLAG_R },
181  { "mpn_divrem_1_inv",  speed_mpn_divrem_1_inv,  FLAG_R },
182  { "mpn_divrem_1f_div", speed_mpn_divrem_1f_div, FLAG_R },
183  { "mpn_divrem_1f_inv", speed_mpn_divrem_1f_inv, FLAG_R },
184  { "mpn_mod_1_div",     speed_mpn_mod_1_div,     FLAG_R },
185  { "mpn_mod_1_inv",     speed_mpn_mod_1_inv,     FLAG_R },
186
187  { "mpn_divrem_2",      speed_mpn_divrem_2,        },
188  { "mpn_divrem_2_div",  speed_mpn_divrem_2_div,    },
189  { "mpn_divrem_2_inv",  speed_mpn_divrem_2_inv,    },
190
191  { "mpn_divexact_1",    speed_mpn_divexact_1,    FLAG_R },
192  { "mpn_divexact_by3",  speed_mpn_divexact_by3          },
193
194#if HAVE_NATIVE_mpn_modexact_1c_odd
195  { "mpn_modexact_1_odd",  speed_mpn_modexact_1_odd,  FLAG_R },
196#endif
197  { "mpn_modexact_1c_odd", speed_mpn_modexact_1c_odd, FLAG_R },
198
199  { "mpn_dc_tdiv_qr",       speed_mpn_dc_tdiv_qr       },
200  { "mpn_dc_divrem_n",      speed_mpn_dc_divrem_n      },
201  { "mpn_dc_divrem_sb",     speed_mpn_dc_divrem_sb     },
202  { "mpn_dc_divrem_sb_div", speed_mpn_dc_divrem_sb_div },
203  { "mpn_dc_divrem_sb_inv", speed_mpn_dc_divrem_sb_inv },
204
205  { "mpn_sb_divrem_m3",     speed_mpn_sb_divrem_m3     },
206  { "mpn_sb_divrem_m3_div", speed_mpn_sb_divrem_m3_div },
207  { "mpn_sb_divrem_m3_inv", speed_mpn_sb_divrem_m3_inv },
208
209  { "mpn_lshift",        speed_mpn_lshift, FLAG_R   },
210  { "mpn_rshift",        speed_mpn_rshift, FLAG_R   },
211
212  { "mpn_and_n",         speed_mpn_and_n,  FLAG_R_OPTIONAL },
213  { "mpn_andn_n",        speed_mpn_andn_n, FLAG_R_OPTIONAL },
214  { "mpn_nand_n",        speed_mpn_nand_n, FLAG_R_OPTIONAL },
215  { "mpn_ior_n",         speed_mpn_ior_n,  FLAG_R_OPTIONAL },
216  { "mpn_iorn_n",        speed_mpn_iorn_n, FLAG_R_OPTIONAL },
217  { "mpn_nior_n",        speed_mpn_nior_n, FLAG_R_OPTIONAL },
218  { "mpn_xor_n",         speed_mpn_xor_n,  FLAG_R_OPTIONAL },
219  { "mpn_xnor_n",        speed_mpn_xnor_n, FLAG_R_OPTIONAL },
220  { "mpn_com_n",         speed_mpn_com_n            },
221
222  { "mpn_popcount",      speed_mpn_popcount         },
223  { "mpn_hamdist",       speed_mpn_hamdist          },
224
225  { "mpn_gcd_1",         speed_mpn_gcd_1,  FLAG_R_OPTIONAL },
226  { "mpn_gcd_1N",        speed_mpn_gcd_1N, FLAG_R_OPTIONAL },
227
228  { "mpn_gcd",           speed_mpn_gcd                    },
229  { "mpn_gcd_binary",    speed_mpn_gcd_binary             },
230  { "find_a",            speed_find_a,        FLAG_NODATA },
231#if HAVE_NATIVE_mpn_gcd_finda
232  { "mpn_gcd_finda",     speed_mpn_gcd_finda, FLAG_NODATA },
233#endif
234
235  { "mpn_gcdext",            speed_mpn_gcdext            },
236  { "mpn_gcdext_single",     speed_mpn_gcdext_single     },
237  { "mpn_gcdext_double",     speed_mpn_gcdext_double     },
238  { "mpn_gcdext_one_single", speed_mpn_gcdext_one_single },
239  { "mpn_gcdext_one_double", speed_mpn_gcdext_one_double },
240
241  { "mpz_jacobi",        speed_mpz_jacobi           },
242  { "mpn_jacobi_base",   speed_mpn_jacobi_base      },
243  { "mpn_jacobi_base_1", speed_mpn_jacobi_base_1    },
244  { "mpn_jacobi_base_2", speed_mpn_jacobi_base_2    },
245  { "mpn_jacobi_base_3", speed_mpn_jacobi_base_3    },
246
247  { "mpn_mul_basecase",  speed_mpn_mul_basecase, FLAG_R_OPTIONAL },
248  { "mpn_sqr_basecase",  speed_mpn_sqr_basecase     },
249#if HAVE_NATIVE_mpn_sqr_diagonal
250  { "mpn_sqr_diagonal",  speed_mpn_sqr_diagonal     },
251#endif
252
253  { "mpn_mul_n",         speed_mpn_mul_n            },
254  { "mpn_sqr_n",         speed_mpn_sqr_n            },
255
256  { "mpn_kara_mul_n",    speed_mpn_kara_mul_n       },
257  { "mpn_kara_sqr_n",    speed_mpn_kara_sqr_n       },
258  { "mpn_toom3_mul_n",   speed_mpn_toom3_mul_n      },
259  { "mpn_toom3_sqr_n",   speed_mpn_toom3_sqr_n      },
260  { "mpn_mul_fft_full",      speed_mpn_mul_fft_full      },
261  { "mpn_mul_fft_full_sqr",  speed_mpn_mul_fft_full_sqr  },
262
263  { "mpn_mul_fft",       speed_mpn_mul_fft,     FLAG_R_OPTIONAL },
264  { "mpn_mul_fft_sqr",   speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL },
265
266  { "mpn_toom3_mul_n_mpn",   speed_mpn_toom3_mul_n_mpn   },
267  { "mpn_toom3_mul_n_open",  speed_mpn_toom3_mul_n_open  },
268  { "mpn_toom3_sqr_n_mpn",   speed_mpn_toom3_sqr_n_mpn   },
269  { "mpn_toom3_sqr_n_open",  speed_mpn_toom3_sqr_n_open  },
270
271  { "mpn_get_str",       speed_mpn_get_str,  FLAG_R_OPTIONAL },
272
273  { "mpn_set_str",          speed_mpn_set_str,          FLAG_R_OPTIONAL },
274  { "mpn_set_str_basecase", speed_mpn_set_str_basecase, FLAG_R_OPTIONAL },
275  { "mpn_set_str_subquad",  speed_mpn_set_str_subquad,  FLAG_R_OPTIONAL },
276
277  { "mpn_sqrtrem",       speed_mpn_sqrtrem          },
278
279  { "mpn_fib2_ui",       speed_mpn_fib2_ui,    FLAG_NODATA },
280  { "mpz_fib_ui",        speed_mpz_fib_ui,     FLAG_NODATA },
281  { "mpz_fib2_ui",       speed_mpz_fib2_ui,    FLAG_NODATA },
282  { "mpz_lucnum_ui",     speed_mpz_lucnum_ui,  FLAG_NODATA },
283  { "mpz_lucnum2_ui",    speed_mpz_lucnum2_ui, FLAG_NODATA },
284
285  { "mpz_add",           speed_mpz_add              },
286  { "mpz_bin_uiui",      speed_mpz_bin_uiui, FLAG_NODATA | FLAG_R_OPTIONAL },
287  { "mpz_fac_ui",        speed_mpz_fac_ui,   FLAG_NODATA   },
288  { "mpz_powm",          speed_mpz_powm             },
289  { "mpz_powm_mod",      speed_mpz_powm_mod         },
290  { "mpz_powm_redc",     speed_mpz_powm_redc        },
291  { "mpz_powm_ui",       speed_mpz_powm_ui          },
292
293  { "mpz_mod",           speed_mpz_mod              },
294  { "redc",              speed_redc                 },
295
296  { "MPN_COPY",          speed_MPN_COPY             },
297  { "MPN_COPY_INCR",     speed_MPN_COPY_INCR        },
298  { "MPN_COPY_DECR",     speed_MPN_COPY_DECR        },
299  { "memcpy",            speed_memcpy               },
300#if HAVE_NATIVE_mpn_copyi
301  { "mpn_copyi",         speed_mpn_copyi            },
302#endif
303#if HAVE_NATIVE_mpn_copyd
304  { "mpn_copyd",         speed_mpn_copyd            },
305#endif
306
307  { "MPN_ZERO",          speed_MPN_ZERO             },
308
309  { "modlimb_invert",       speed_modlimb_invert,       FLAG_NODATA },
310  { "modlimb_invert_mul1",  speed_modlimb_invert_mul1,  FLAG_NODATA },
311  { "modlimb_invert_loop",  speed_modlimb_invert_loop,  FLAG_NODATA },
312  { "modlimb_invert_cond",  speed_modlimb_invert_cond,  FLAG_NODATA },
313  { "modlimb_invert_arith", speed_modlimb_invert_arith, FLAG_NODATA },
314
315  { "malloc_free",                  speed_malloc_free                  },
316  { "malloc_realloc_free",          speed_malloc_realloc_free          },
317  { "gmp_allocate_free",            speed_gmp_allocate_free            },
318  { "gmp_allocate_reallocate_free", speed_gmp_allocate_reallocate_free },
319  { "mpz_init_clear",               speed_mpz_init_clear               },
320  { "mpq_init_clear",               speed_mpq_init_clear               },
321  { "mpf_init_clear",               speed_mpf_init_clear               },
322  { "mpz_init_realloc_clear",       speed_mpz_init_realloc_clear       },
323
324  { "umul_ppmm",         speed_umul_ppmm,     FLAG_R_OPTIONAL },
325#if HAVE_NATIVE_mpn_umul_ppmm
326  { "mpn_umul_ppmm",     speed_mpn_umul_ppmm, FLAG_R_OPTIONAL },
327#endif
328
329  { "count_leading_zeros",  speed_count_leading_zeros,  FLAG_NODATA | FLAG_R_OPTIONAL },
330  { "count_trailing_zeros", speed_count_trailing_zeros, FLAG_NODATA | FLAG_R_OPTIONAL },
331
332  { "udiv_qrnnd",             speed_udiv_qrnnd,             FLAG_R_OPTIONAL },
333  { "udiv_qrnnd_preinv",      speed_udiv_qrnnd_preinv,      FLAG_R_OPTIONAL },
334  { "udiv_qrnnd_preinv2norm", speed_udiv_qrnnd_preinv2norm, FLAG_R_OPTIONAL },
335  { "udiv_qrnnd_c",           speed_udiv_qrnnd_c,           FLAG_R_OPTIONAL },
336#if HAVE_NATIVE_mpn_udiv_qrnnd
337  { "mpn_udiv_qrnnd",         speed_mpn_udiv_qrnnd,         FLAG_R_OPTIONAL },
338#endif
339  { "invert_limb",            speed_invert_limb,            FLAG_R_OPTIONAL },
340
341  { "operator_div",           speed_operator_div,           FLAG_R_OPTIONAL },
342  { "operator_mod",           speed_operator_mod,           FLAG_R_OPTIONAL },
343
344#ifdef SPEED_EXTRA_ROUTINES
345  SPEED_EXTRA_ROUTINES
346#endif
347#ifdef SPEED_EXTRA_ROUTINES2
348  SPEED_EXTRA_ROUTINES2
349#endif
350};
351
352
353struct choice_t {
354  const struct routine_t  *p;
355  mp_limb_t               r;
356  double                  scale;
357  double                  time;
358  int                     no_time;
359  double                  prev_time;
360  const char              *name;
361};
362struct choice_t  *choice;
363int  num_choices = 0;
364
365
366void
367data_fill (mp_ptr ptr, mp_size_t size)
368{
369  switch (option_data) {
370  case DATA_RANDOM:
371    mpn_random (ptr, size);
372    break;
373  case DATA_RANDOM2:
374    mpn_random2 (ptr, size);
375    break;
376  case DATA_ZEROS:
377    MPN_ZERO (ptr, size);
378    break;
379  case DATA_AAS:
380    MPN_FILL (ptr, size, GMP_NUMB_0xAA);
381    break;
382  case DATA_FFS:
383    MPN_FILL (ptr, size, GMP_NUMB_MAX);
384    break;
385  case DATA_2FD:
386    MPN_FILL (ptr, size, GMP_NUMB_MAX);
387    ptr[0] -= 2;
388    break;
389  default:
390    abort();
391    /*NOTREACHED*/
392  }
393}
394
395/* The code here handling the various combinations of output options isn't
396   too attractive, but it works and is fairly clean.  */
397
398#define SIZE_TO_DIVISOR(n)              \
399  (option_square == 1 ? (n)*(n)         \
400  : option_square == 2 ? (n)*((n)+1)/2  \
401  : (n))
402
403void
404run_one (FILE *fp, struct speed_params *s, mp_size_t prev_size)
405{
406  const char  *first_open_fastest, *first_open_notfastest, *first_close;
407  int         i, fastest, want_data;
408  double      fastest_time;
409  TMP_DECL (marker);
410
411  TMP_MARK (marker);
412
413  /* allocate data, unless all routines are NODATA */
414  want_data = 0;
415  for (i = 0; i < num_choices; i++)
416    want_data |= ((choice[i].p->flag & FLAG_NODATA) == 0);
417
418  if (want_data)
419    {
420      sp.xp = SPEED_TMP_ALLOC_LIMBS (s->size, s->align_xp);
421      sp.yp = SPEED_TMP_ALLOC_LIMBS (s->size, s->align_yp);
422
423      data_fill (s->xp, s->size);
424      data_fill (s->yp, s->size);
425    }
426  else
427    {
428      sp.xp = NULL;
429      sp.yp = NULL;
430    }
431
432  if (prev_size == -1 && option_cmp == CMP_DIFFPREV)
433    {
434      first_open_fastest = "(#";
435      first_open_notfastest = " (";
436      first_close = ")";
437    }
438  else
439    {
440      first_open_fastest = "#";
441      first_open_notfastest = " ";
442      first_close = "";
443    }
444
445  fastest = -1;
446  fastest_time = -1.0;
447  for (i = 0; i < num_choices; i++)
448    {
449      s->r = choice[i].r;
450      choice[i].time = speed_measure (choice[i].p->fun, s);
451      choice[i].no_time = (choice[i].time == -1.0);
452      choice[i].time *= choice[i].scale;
453
454      /* Apply the effect of CMP_DIFFPREV, but the new choice[i].prev_time
455         is before any differences.  */
456      {
457        double     t;
458        t = choice[i].time;
459        if (t != -1.0 && option_cmp == CMP_DIFFPREV && prev_size != -1)
460          {
461            if (choice[i].prev_time == -1.0)
462              choice[i].no_time = 1;
463            else
464              choice[i].time = choice[i].time - choice[i].prev_time;
465          }
466        choice[i].prev_time = t;
467      }
468
469      if (choice[i].no_time)
470        continue;
471
472      /* Look for the fastest after CMP_DIFFPREV has been applied, but
473         before CMP_RATIO or CMP_DIFFERENCE.  There's only a fastest shown
474         if there's more than one routine.  */
475      if (num_choices > 1 && (fastest == -1 || choice[i].time < fastest_time))
476        {
477          fastest = i;
478          fastest_time = choice[i].time;
479        }
480
481      if (option_cmp == CMP_DIFFPREV)
482        {
483          /* Conversion for UNIT_CYCLESPERLIMB differs in CMP_DIFFPREV. */
484          if (option_unit == UNIT_CYCLES)
485            choice[i].time /= speed_cycletime;
486          else if (option_unit == UNIT_CYCLESPERLIMB)
487            {
488              if (prev_size == -1)
489                choice[i].time /= speed_cycletime;
490              else
491                choice[i].time /=  (speed_cycletime
492                                    * (SIZE_TO_DIVISOR(s->size)
493                                       - SIZE_TO_DIVISOR(prev_size)));
494            }
495        }
496      else
497        {
498          if (option_unit == UNIT_CYCLES)
499            choice[i].time /= speed_cycletime;
500          else if (option_unit == UNIT_CYCLESPERLIMB)
501            choice[i].time /= (speed_cycletime * SIZE_TO_DIVISOR(s->size));
502         
503          if (option_cmp == CMP_RATIO && i > 0)
504            {
505              /* A ratio isn't affected by the units chosen. */
506              if (choice[0].no_time || choice[0].time == 0.0)
507                choice[i].no_time = 1;
508              else
509                choice[i].time /= choice[0].time;
510            }
511          else if (option_cmp == CMP_DIFFERENCE && i > 0)
512            {
513              if (choice[0].no_time)
514                {
515                  choice[i].no_time = 1;
516                  continue;
517                }
518              choice[i].time -= choice[0].time;
519            }
520        }
521    }
522
523  if (option_gnuplot)
524    {
525      /* In CMP_DIFFPREV, don't print anything for the first size, start
526         with the second where an actual difference is available.
527
528         In CMP_RATIO, print the first column as 1.0.
529
530         The 9 decimals printed is much more than the expected precision of
531         the measurements actually. */
532
533      if (! (option_cmp == CMP_DIFFPREV && prev_size == -1))
534        {     
535          fprintf (fp, "%-6ld ", s->size);
536          for (i = 0; i < num_choices; i++)
537            fprintf (fp, "  %.9e",
538                     choice[i].no_time ? 0.0
539                     : (option_cmp == CMP_RATIO && i == 0) ? 1.0
540                     : choice[i].time);
541          fprintf (fp, "\n");
542        }
543    }
544  else
545    {
546      fprintf (fp, "%-6ld ", s->size);
547      for (i = 0; i < num_choices; i++)
548        {
549          char  buf[128];
550          int   decimals;
551
552          if (choice[i].no_time)
553            decimals = 0, choice[i].time = 0.0;
554          else if (option_unit == UNIT_CYCLESPERLIMB
555                   || (option_cmp == CMP_RATIO && i > 0))
556            decimals = 4;
557          else if (option_unit == UNIT_CYCLES)
558            decimals = 2;
559          else
560            decimals = 9;
561
562          sprintf (buf, "%s%.*f%s",
563                   i == fastest ? first_open_fastest : first_open_notfastest,
564                   decimals, choice[i].time, first_close);
565          fprintf (fp, " %*s", COLUMN_WIDTH, buf);
566        }
567      fprintf (fp, "\n");
568    }
569
570  TMP_FREE (marker);
571}
572
573void
574run_all (FILE *fp)
575{
576  mp_size_t  prev_size;
577  int        i;
578  TMP_DECL (marker);
579
580  TMP_MARK (marker);
581  sp.xp_block = SPEED_TMP_ALLOC_LIMBS (SPEED_BLOCK_SIZE, sp.align_xp);
582  sp.yp_block = SPEED_TMP_ALLOC_LIMBS (SPEED_BLOCK_SIZE, sp.align_yp);
583
584  data_fill (sp.xp_block, SPEED_BLOCK_SIZE);
585  data_fill (sp.yp_block, SPEED_BLOCK_SIZE);
586
587  for (i = 0; i < size_num; i++)
588    {
589      sp.size = size_array[i].start;
590      prev_size = -1;
591      for (;;)
592        {
593          mp_size_t  step;
594
595          if (option_data == DATA_2FD && sp.size >= 2)
596            sp.xp[sp.size-1] = 2;
597
598          run_one (fp, &sp, prev_size);
599          prev_size = sp.size;
600
601          if (option_data == DATA_2FD && sp.size >= 2)
602            sp.xp[sp.size-1] = MP_LIMB_T_MAX;
603
604          if (option_factor != 0.0)
605            {
606              step = (mp_size_t) (sp.size * option_factor - sp.size);
607              if (step < 1)
608                step = 1;
609            }
610          else
611            step = 1;
612          if (step < option_step)
613            step = option_step;
614
615          sp.size += step;
616          if (sp.size > size_array[i].end)
617            break;
618        }
619    }
620
621  TMP_FREE (marker);
622}
623
624
625FILE *
626fopen_for_write (const char *filename)
627{
628  FILE  *fp;
629  if ((fp = fopen (filename, "w")) == NULL)
630    {
631      fprintf (stderr, "Cannot create %s\n", filename);
632      exit(1);
633    }
634  return fp;
635}
636     
637void
638fclose_written (FILE *fp, const char *filename)
639{
640  int  err;
641
642  err = ferror (fp);
643  err |= fclose (fp);
644
645  if (err)
646    {
647      fprintf (stderr, "Error writing %s\n", filename);
648      exit(1);
649    }
650}
651
652
653void
654run_gnuplot (int argc, char *argv[])
655{
656  char  *plot_filename;
657  char  *data_filename;
658  FILE  *fp;
659  int   i;
660     
661  plot_filename = (char *) (*__gmp_allocate_func)
662    (strlen (option_gnuplot_basename) + 20);
663  data_filename = (char *) (*__gmp_allocate_func)
664    (strlen (option_gnuplot_basename) + 20);
665     
666  sprintf (plot_filename, "%s.gnuplot", option_gnuplot_basename);
667  sprintf (data_filename, "%s.data",    option_gnuplot_basename);
668
669  fp = fopen_for_write (plot_filename);
670
671  fprintf (fp, "# Generated with:\n");
672  fprintf (fp, "#");
673  for (i = 0; i < argc; i++)
674    fprintf (fp, " %s", argv[i]);
675  fprintf (fp, "\n");
676  fprintf (fp, "\n");
677
678  /* Putting the key at the top left is usually good, and you can change it
679     interactively if it's not. */
680  fprintf (fp, "set key left\n");
681
682  /* designed to make it possible to see crossovers easily */
683  fprintf (fp, "set data style linespoints\n");
684
685  fprintf (fp, "plot ");
686  for (i = 0; i < num_choices; i++)
687    {
688      fprintf (fp, " \"%s\" using 1:%d", data_filename, i+2);
689      fprintf (fp, " title \"%s\"", choice[i].name);
690
691      if (i != num_choices-1)
692        fprintf (fp, ", \\");
693      fprintf (fp, "\n");
694    }
695
696  fprintf (fp, "load \"-\"\n");
697  fclose_written (fp, plot_filename);
698
699  fp = fopen_for_write (data_filename);
700
701  /* Unbuffered so you can see where the program was up to if it crashes or
702     you kill it. */
703  setbuf (fp, NULL);
704
705  run_all (fp);
706  fclose_written (fp, data_filename);
707}
708
709
710/* Return a limb with n many one bits (starting from the least significant) */
711
712#define LIMB_ONES(n) \
713  ((n) == BITS_PER_MP_LIMB ? MP_LIMB_T_MAX      \
714    : (n) == 0 ? CNST_LIMB(0)                   \
715    : (CNST_LIMB(1) << (n)) - 1)
716
717mp_limb_t
718r_string (const char *s)
719{
720  const char  *s_orig = s;
721  long        n;
722
723  if (strcmp (s, "aas") == 0)
724    return GMP_NUMB_0xAA;
725
726  {
727    mpz_t      z;
728    mp_limb_t  l;
729    int        set, siz;
730
731    mpz_init (z);
732    set = mpz_set_str (z, s, 0);
733    siz = SIZ(z);
734    l = (siz == 0 ? 0 : siz > 0 ? PTR(z)[0] : -PTR(z)[0]);
735    mpz_clear (z);
736    if (set == 0)
737      {
738        if (siz > 1 || siz < -1)
739          printf ("Warning, r parameter %s truncated to %d bits\n",
740                  s_orig, BITS_PER_MP_LIMB);
741        return l;
742      }
743  }
744
745  if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
746    n = strtoul (s+2, (char **) &s, 16);
747  else
748    n = strtol (s, (char **) &s, 10);
749
750  if (strcmp (s, "bits") == 0)
751    {
752      mp_limb_t  l;
753      if (n > BITS_PER_MP_LIMB)
754        {
755          fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
756                   n, BITS_PER_MP_LIMB);
757          exit (1);
758        }
759      mpn_random (&l, 1);
760      return (l | (1 << (n-1))) & LIMB_ONES(n);
761    }
762  else  if (strcmp (s, "ones") == 0)
763    {
764      if (n > BITS_PER_MP_LIMB)
765        {
766          fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
767                   n, BITS_PER_MP_LIMB);
768          exit (1);
769        }
770      return LIMB_ONES (n);
771    }
772  else if (*s != '\0')
773    {
774      fprintf (stderr, "invalid r parameter: %s\n", s_orig);
775      exit (1);
776    }
777
778  return n;
779}
780
781
782void
783routine_find (struct choice_t *c, const char *s_orig)
784{
785  const char  *s;
786  int     i;
787  size_t  nlen;
788
789  c->name = s_orig;
790  s = strchr (s_orig, '*');
791  if (s != NULL)
792    {
793      c->scale = atof(s_orig);
794      s++;
795    }
796  else
797    {
798      c->scale = 1.0;
799      s = s_orig;
800    }
801
802  for (i = 0; i < numberof (routine); i++)
803    {
804      nlen = strlen (routine[i].name);
805      if (memcmp (s, routine[i].name, nlen) != 0)
806        continue;
807
808      if (s[nlen] == '.')
809        {
810          /* match, with a .r parameter */
811
812          if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL)))
813            {
814              fprintf (stderr,
815                       "Choice %s bad: doesn't take a \".<r>\" parameter\n",
816                       s_orig);
817              exit (1);
818            }
819
820          c->p = &routine[i];
821          c->r = r_string (s + nlen + 1);
822          return;
823        }
824
825      if (s[nlen] == '\0')
826        {
827          /* match, with no parameter */
828
829          if (routine[i].flag & FLAG_R)
830            {
831              fprintf (stderr,
832                       "Choice %s bad: needs a \".<r>\" parameter\n",
833                       s_orig);
834              exit (1);
835            }
836
837          c->p = &routine[i];
838          c->r = 0;
839          return;
840        }
841    }
842
843  fprintf (stderr, "Choice %s unrecognised\n", s_orig);
844  exit (1);
845}
846
847
848void
849usage (void)
850{
851  int  i;
852 
853  speed_time_init ();
854
855  printf ("\
856Usage: speed [-options] -s size <routine>...\n\
857Measure the speed of some routines.\n\
858Times are in seconds, accuracy is shown.\n\
859\n\
860   -p num     set precision as number of time units each routine must run\n\
861   -s size[-end][,size[-end]]...   sizes to measure\n\
862              single sizes or ranges, sep with comma or use multiple -s\n\
863   -t step    step through sizes by given amount\n\
864   -f factor  step through sizes by given factor (eg. 1.05)\n\
865   -r         show times as ratios of the first routine\n\
866   -d         show times as difference from the first routine\n\
867   -D         show times as difference from previous size shown\n\
868   -c         show times in CPU cycles\n\
869   -C         show times in cycles per limb\n\
870   -u         print resource usage (memory) at end\n\
871   -P name    output plot files \"name.gnuplot\" and \"name.data\"\n\
872   -a <type>  use given data: random(default), random2, zeros, aas, ffs, 2fd\n\
873   -x, -y, -w, -W <align>  specify data alignments, sources and dests\n\
874   -o addrs   print addresses of data blocks\n\
875\n\
876If both -t and -f are used, it means step by the factor or the step, whichever\n\
877is greater.\n\
878If both -C and -D are used, it means cycles per however many limbs between a\n\
879size and the previous size.\n\
880\n\
881After running with -P, plots can be viewed with Gnuplot or Quickplot.\n\
882\"gnuplot name.gnuplot\" (use \"set logscale xy; replot\" at the prompt for\n\
883a log/log plot).\n\
884\"quickplot -s name.data\" (has interactive zooming, and note -s is important\n\
885when viewing more than one routine, it means same axis scales for all data).\n\
886\n\
887The available routines are as follows.\n\
888\n\
889");
890
891  for (i = 0; i < numberof (routine); i++)
892    {
893      if (routine[i].flag & FLAG_R)
894        printf ("\t%s.r\n", routine[i].name);
895      else if (routine[i].flag & FLAG_R_OPTIONAL)
896        printf ("\t%s (optional .r)\n", routine[i].name);
897      else
898        printf ("\t%s\n", routine[i].name);
899    }
900     
901  printf ("\n\
902Routines with a \".r\" need an extra parameter, for example mpn_lshift.6\n\
903r should be in decimal, or use 0xN for hexadecimal.\n\
904\n\
905Special forms for r are \"<N>bits\" for a random N bit number, \"<N>ones\" for\n\
906N one bits, or \"aas\" for 0xAA..AA.\n\
907\n\
908Times for sizes out of the range accepted by a routine are shown as 0.\n\
909The fastest routine at each size is marked with a # (free form output only).\n\
910\n\
911%s\
912\n\
913Gnuplot home page http://www.cs.dartmouth.edu/gnuplot_info.html\n\
914Quickplot home page http://www.kachinatech.com/~quickplot\n\
915", speed_time_string);
916}
917
918int
919main (int argc, char *argv[])
920{
921  int  i;
922  int  opt;
923
924  /* Unbuffered so output goes straight out when directed to a pipe or file
925     and isn't lost on killing the program half way.  */
926  setbuf (stdout, NULL);
927
928  for (;;)
929    {
930      opt = getopt(argc, argv, "a:CcDdEFf:o:p:P:rRs:t:ux:y:w:W:z");
931      if (opt == EOF)
932        break;
933
934      switch (opt) {
935      case 'a':
936        if (strcmp (optarg, "random") == 0)       option_data = DATA_RANDOM;
937        else if (strcmp (optarg, "random2") == 0) option_data = DATA_RANDOM2;
938        else if (strcmp (optarg, "zeros") == 0)   option_data = DATA_ZEROS;
939        else if (strcmp (optarg, "aas") == 0)     option_data = DATA_AAS;
940        else if (strcmp (optarg, "ffs") == 0)     option_data = DATA_FFS;
941        else if (strcmp (optarg, "2fd") == 0)     option_data = DATA_2FD;
942        else
943          {
944            fprintf (stderr, "unrecognised data option: %s\n", optarg);
945            exit (1);
946          }
947        break;
948      case 'C':
949        if (option_unit  != UNIT_SECONDS) goto bad_unit;
950        option_unit = UNIT_CYCLESPERLIMB;
951        break;
952      case 'c':
953        if (option_unit != UNIT_SECONDS)
954          {
955          bad_unit:
956            fprintf (stderr, "cannot use more than one of -c, -C\n");
957            exit (1);
958          }
959        option_unit = UNIT_CYCLES;
960        break;
961      case 'D':
962        if (option_cmp != CMP_ABSOLUTE) goto bad_cmp;
963        option_cmp = CMP_DIFFPREV;
964        break;
965      case 'd':
966        if (option_cmp != CMP_ABSOLUTE)
967          {
968          bad_cmp:
969            fprintf (stderr, "cannot use more than one of -d, -D, -r\n");
970            exit (1);
971          }
972        option_cmp = CMP_DIFFERENCE;
973        break;
974      case 'E':
975        option_square = 1;
976        break;
977      case 'F':
978        option_square = 2;
979        break;
980      case 'f':
981        option_factor = atof (optarg);
982        if (option_factor <= 1.0)
983          {
984            fprintf (stderr, "-f factor must be > 1.0\n");
985            exit (1);
986          }
987        break;
988      case 'o':
989        speed_option_set (optarg);
990        break;
991      case 'P':
992        option_gnuplot = 1;
993        option_gnuplot_basename = optarg;
994        break;
995      case 'p':
996        speed_precision = atoi (optarg);
997        break;
998      case 'R':
999        option_seed = time (NULL);
1000        break;
1001      case 'r':
1002        if (option_cmp != CMP_ABSOLUTE)
1003          goto bad_cmp;
1004        option_cmp = CMP_RATIO;
1005        break;
1006      case 's':
1007        {
1008          char  *s;
1009          for (s = strtok (optarg, ","); s != NULL; s = strtok (NULL, ","))
1010            {
1011              if (size_num == size_allocnum)
1012                {
1013                  size_array = (struct size_array_t *)
1014                    __gmp_allocate_or_reallocate
1015                    (size_array,
1016                     size_allocnum * sizeof(size_array[0]),
1017                     (size_allocnum+10) * sizeof(size_array[0]));
1018                  size_allocnum += 10;
1019                }
1020              if (sscanf (s, "%ld-%ld",
1021                          &size_array[size_num].start,
1022                          &size_array[size_num].end) != 2)
1023                {
1024                  size_array[size_num].start = size_array[size_num].end
1025                    = atol (s);
1026                }
1027
1028              if (size_array[size_num].start < 0
1029                  || size_array[size_num].end < 0
1030                  || size_array[size_num].start > size_array[size_num].end)
1031                {
1032                  fprintf (stderr, "invalid size parameter: %s\n", s);
1033                  exit (1);
1034                }
1035
1036              size_num++;
1037            }
1038        }
1039        break;
1040      case 't':
1041        option_step = atol (optarg);
1042        if (option_step < 1)
1043          {
1044            fprintf (stderr, "-t step must be >= 1\n");
1045            exit (1);
1046          }
1047        break;
1048      case 'u':
1049        option_resource_usage = 1;
1050        break;
1051      case 'z':
1052        sp.cache = 1;
1053        break;
1054      case 'x':
1055        sp.align_xp = atol (optarg);
1056        break;
1057      case 'y':
1058        sp.align_yp = atol (optarg);
1059        break;
1060      case 'w':
1061        sp.align_wp = atol (optarg);
1062        break;
1063      case 'W':
1064        sp.align_wp2 = atol (optarg);
1065        break;
1066      case '?':
1067        exit(1);
1068      }
1069    }
1070
1071  if (optind >= argc)
1072    {
1073      usage ();
1074      exit (1);
1075    }
1076
1077  if (size_num == 0)
1078    {
1079      fprintf (stderr, "-s <size> must be specified\n");
1080      exit (1);
1081    }
1082
1083  gmp_randseed_ui (RANDS, option_seed);
1084
1085  choice = (struct choice_t *) (*__gmp_allocate_func)
1086    ((argc - optind) * sizeof(choice[0]));
1087  for ( ; optind < argc; optind++)
1088    {
1089      struct choice_t  c;
1090      routine_find (&c, argv[optind]);
1091      choice[num_choices] = c;
1092      num_choices++;
1093    }
1094 
1095  if ((option_cmp == CMP_RATIO || option_cmp == CMP_DIFFERENCE) &&
1096      num_choices < 2)
1097    {
1098      fprintf (stderr, "WARNING, -d or -r does nothing when only one routine requested\n");
1099    }
1100
1101  speed_time_init ();
1102  if (option_unit == UNIT_CYCLES || option_unit == UNIT_CYCLESPERLIMB)
1103    speed_cycletime_need_cycles ();
1104  else
1105    speed_cycletime_need_seconds ();
1106
1107  if (option_gnuplot)
1108    {
1109      run_gnuplot (argc, argv);
1110    }
1111  else
1112    {
1113      if (option_unit == UNIT_SECONDS)
1114        printf ("overhead %.9f secs", speed_measure (speed_noop, NULL));
1115      else
1116        printf ("overhead %.2f cycles",
1117                speed_measure (speed_noop, NULL) / speed_cycletime);
1118      printf (", precision %d units of %.2e secs",
1119              speed_precision, speed_unittime);
1120     
1121      if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
1122        printf (", CPU freq unknown\n");
1123      else
1124        printf (", CPU freq %.2f MHz\n", 1e-6/speed_cycletime);
1125
1126      printf ("       ");
1127      for (i = 0; i < num_choices; i++)
1128        printf (" %*s", COLUMN_WIDTH, choice[i].name);
1129      printf ("\n");
1130
1131      run_all (stdout);
1132    }
1133
1134  if (option_resource_usage)
1135    {
1136#if HAVE_GETRUSAGE
1137      {
1138        /* This doesn't give data sizes on linux 2.0.x, only utime. */
1139        struct rusage  r;
1140        if (getrusage (RUSAGE_SELF, &r) != 0)
1141          perror ("getrusage");
1142        else
1143          printf ("getrusage(): utime %ld.%06ld data %ld stack %ld maxresident %ld\n",
1144                  r.ru_utime.tv_sec, r.ru_utime.tv_usec,
1145                  r.ru_idrss, r.ru_isrss, r.ru_ixrss);
1146      }
1147#else
1148      printf ("getrusage() not available\n");
1149#endif
1150
1151      /* Linux kernel. */
1152      {
1153        char  buf[128];
1154        sprintf (buf, "/proc/%d/status", getpid());
1155        if (access (buf, R_OK) == 0)
1156          {
1157            sprintf (buf, "cat /proc/%d/status", getpid());
1158            system (buf);
1159          }
1160
1161      }
1162    }
1163
1164  return 0;
1165}
1166
1167
Note: See TracBrowser for help on using the repository browser.