source: trunk/third/gmp/tune/time.c @ 18191

Revision 18191, 39.9 KB checked in by ghudson, 22 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r18190, which included commits to RCS files with non-trunk default branches.
Line 
1/* Time routines for speed measurments.
2
3Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
4
5This file is part of the GNU MP Library.
6
7The GNU MP Library is free software; you can redistribute it and/or modify
8it under the terms of the GNU Lesser General Public License as published by
9the Free Software Foundation; either version 2.1 of the License, or (at your
10option) any later version.
11
12The GNU MP Library is distributed in the hope that it will be useful, but
13WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15License for more details.
16
17You should have received a copy of the GNU Lesser General Public License
18along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
19the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20MA 02111-1307, USA. */
21
22
23/* speed_time_init() - initialize timing things.  speed_starttime() calls
24   this if it hasn't been done yet, so you only need to call this explicitly
25   if you want to use the global variables before the first measurement.
26 
27   speed_starttime() - start a time measurment.
28
29   speed_endtime() - end a time measurment, return time taken (seconds or
30   cycles).
31
32   speed_time_string - a string describing the time method in use.
33
34   speed_unittime - global variable with the unit of time measurement
35   accuracy (seconds or cycles).
36
37   speed_precision - global variable which is the intended accuracy of time
38   measurements.  speed_measure() for instance runs target routines with
39   enough repetitions so it takes at least speed_unittime*speed_precision.
40   A program can provide an option so the user can set this, otherwise it
41   gets a default based on the measuring method chosen.
42
43   speed_cycletime - the time in seconds for each CPU cycle, for example on
44   a 100 MHz CPU this would be 1.0e-8.  If the CPU frequency is unknown this
45   is 0.0 if the time base is in seconds, or 1.0 if it's in cycles.
46
47
48   speed_endtime() and speed_unittime are normally in seconds, but if a
49   cycle counter is being used to measure and the CPU frequency is unknown,
50   then speed_endtime() returns cycles and speed_cycletime and
51   speed_unittime are 1.0.
52
53   Notice that speed_unittime*speed_precision is the target duration for
54   speed_endtime(), irrespective of whether that's in seconds or cycles.
55
56   Call speed_cycletime_need_seconds() to demand that speed_endtime() is in
57   seconds and not perhaps in cycles.
58
59   Call speed_cycletime_need_cycles() to demand that speed_cycletime is
60   non-zero, so that speed_endtime()/speed_cycletime will work to give times
61   in cycles.
62
63
64   Notes:
65
66   Various combinations of cycle counter, read_real_time(), getrusage(),
67   gettimeofday() and times() can arise, according to which are available
68   and their precision.
69
70
71   Allowing speed_endtime() to return either seconds or cycles is only a
72   slight complication and makes it possible for the speed program to do
73   some sensible things without demanding the CPU frequency.  If seconds are
74   being measured then it can always print seconds, and if cycles are being
75   measured then it can always print them without needing to know how long
76   they are.  Also the tune program doesn't care at all what the units are.
77
78   GMP_CPU_FREQUENCY can always be set when the automated methods in freq.c
79   fail.  This will be needed if times in seconds are wanted but a cycle
80   counter is being used, or if times in cycles are wanted but getrusage or
81   another seconds based timer is in use.
82
83   If the measuring method uses a cycle counter but supplements it with
84   getrusage or the like, then knowing the CPU frequency is mandatory since
85   the code compares values from the two.
86
87
88   Solaris gethrtime() seems no more than a slow way to access the Sparc V9
89   cycle counter.  gethrvtime() seems to be relevant only to LWP, it doesn't
90   for instance give nanosecond virtual time.  So neither of these are used.
91
92
93   Bugs:
94
95   getrusage_microseconds_p is fundamentally flawed, getrusage and
96   gettimeofday can have resolutions other than clock ticks or microseconds,
97   for instance IRIX 5 has a tick of 10 ms but a getrusage of 1 ms.
98
99   Enhancements:
100
101   The SGI hardware counter has 64 bits on some machines, which could be
102   used when available.  But perhaps 32 bits is enough range, and then rely
103   on the getrusage supplement.
104
105   Maybe getrusage (or times) should be used as a supplement for any
106   wall-clock measuring method.  Currently a wall clock with a good range
107   (eg. a 64-bit cycle counter) is used without a supplement.
108
109   On PowerPC the timebase registers could be used, but would have to do
110   something to find out the speed.  On 6xx chips it's normally 1/4 bus
111   speed, on 4xx chips it's wither that or an external clock.  Measuring
112   against gettimeofday might be ok.  */
113
114
115#include "config.h"
116
117#include <errno.h>
118#include <setjmp.h>
119#include <signal.h>
120#include <stddef.h>
121#include <stdio.h>
122#include <string.h>
123#include <stdlib.h> /* for getenv() */
124
125#if HAVE_FCNTL_H
126#include <fcntl.h>  /* for open() */
127#endif
128
129#if HAVE_STDINT_H
130#include <stdint.h> /* for uint64_t */
131#endif
132
133#if HAVE_UNISTD_H
134#include <unistd.h> /* for sysconf() */
135#endif
136
137#include <sys/types.h>
138
139#if TIME_WITH_SYS_TIME
140# include <sys/time.h>  /* for struct timeval */
141# include <time.h>
142#else
143# if HAVE_SYS_TIME_H
144#  include <sys/time.h>
145# else
146#  include <time.h>
147# endif
148#endif
149
150#if HAVE_SYS_MMAN_H
151#include <sys/mman.h>      /* for mmap() */
152#endif
153
154#if HAVE_SYS_RESOURCE_H
155#include <sys/resource.h>  /* for struct rusage */
156#endif
157
158#if HAVE_SYS_SYSSGI_H
159#include <sys/syssgi.h>    /* for syssgi() */
160#endif
161
162#if HAVE_SYS_SYSTEMCFG_H
163#include <sys/systemcfg.h> /* for RTC_POWER on AIX */
164#endif
165
166#if HAVE_SYS_TIMES_H
167#include <sys/times.h>  /* for times() and struct tms */
168#endif
169
170#include "gmp.h"
171#include "gmp-impl.h"
172
173#include "speed.h"
174
175
176
177char    speed_time_string[256];
178int     speed_precision = 0;
179double  speed_unittime;
180double  speed_cycletime = 0.0;
181
182
183/* don't rely on "unsigned" to "double" conversion, it's broken in SunOS 4
184   native cc */
185#define M_2POWU   (((double) INT_MAX + 1.0) * 2.0)
186
187#define M_2POW32  4294967296.0
188#define M_2POW64  (M_2POW32 * M_2POW32)
189
190
191/* Conditionals for the time functions available are done with normal C
192   code, which is a lot easier than wildly nested preprocessor directives.
193
194   The choice of what to use is partly made at run-time, according to
195   whether the cycle counter works and the measured accuracy of getrusage
196   and gettimeofday.
197
198   A routine that's not available won't be getting called, but is an abort()
199   to be sure it isn't called mistakenly.
200
201   It can be assumed that if a function exists then its data type will, but
202   if the function doesn't then the data type might or might not exist, so
203   the type can't be used unconditionally.  The "struct_rusage" etc macros
204   provide dummies when the respective function doesn't exist. */
205
206
207#if HAVE_SPEED_CYCLECOUNTER
208static const int have_cycles = HAVE_SPEED_CYCLECOUNTER;
209#else
210static const int have_cycles = 0;
211#define speed_cyclecounter(p)  ASSERT_FAIL (speed_cyclecounter not available)
212#endif
213
214/* "stck" returns ticks since 1 Jan 1900 00:00 GMT, where each tick is 2^-12
215   microseconds.  Same #ifdefs here as in longlong.h.  */
216#if defined (__GNUC__) && ! defined (NO_ASM)                            \
217  && (defined (__i370__) || defined (__s390__) || defined (__mvs__))
218static const int  have_stck = 1;
219static const int  use_stck = 1;  /* always use when available */
220typedef uint64_t  stck_t; /* gcc for s390 is quite new, always has uint64_t */
221#define STCK(timestamp)                 \
222  do {                                  \
223    asm ("stck %0" : "=m" (timestamp)); \
224  } while (0)
225#else
226static const int  have_stck = 0;
227static const int  use_stck = 0;
228typedef unsigned long  stck_t;   /* dummy */
229#define STCK(timestamp)  ASSERT_FAIL (stck instruction not available)
230#endif
231#define STCK_PERIOD      (1.0 / 4096e6)   /* 2^-12 microseconds */
232
233/* Unicos 10.X has syssgi(), but not mmap(). */
234#if HAVE_SYSSGI && HAVE_MMAP
235static const int  have_sgi = 1;
236#else
237static const int  have_sgi = 0;
238#endif
239
240#if HAVE_READ_REAL_TIME
241static const int have_rrt = 1;
242#else
243static const int have_rrt = 0;
244#define read_real_time(t,s)     ASSERT_FAIL (read_real_time not available)
245#define time_base_to_time(t,s)  ASSERT_FAIL (time_base_to_time not available)
246#define RTC_POWER     1
247#define RTC_POWER_PC  2
248#define timebasestruct_t   struct timebasestruct_dummy
249struct timebasestruct_dummy {
250  int             flag;
251  unsigned int    tb_high;
252  unsigned int    tb_low;
253};
254#endif
255
256#if HAVE_CLOCK_GETTIME
257static const int have_cgt = 1;
258#define struct_timespec  struct timespec
259#else
260static const int have_cgt = 0;
261#define struct_timespec       struct timespec_dummy
262#define clock_gettime(id,ts)  (ASSERT_FAIL (clock_gettime not available), -1)
263#define clock_getres(id,ts)   (ASSERT_FAIL (clock_getres not available), -1)
264#endif
265
266#if HAVE_GETRUSAGE
267static const int have_grus = 1;
268#define struct_rusage   struct rusage
269#else
270static const int have_grus = 0;
271#define getrusage(n,ru)  ASSERT_FAIL (getrusage not available)
272#define struct_rusage    struct rusage_dummy
273#endif
274
275#if HAVE_GETTIMEOFDAY
276static const int have_gtod = 1;
277#define struct_timeval   struct timeval
278#else
279static const int have_gtod = 0;
280#define gettimeofday(tv,tz)  ASSERT_FAIL (gettimeofday not available)
281#define struct_timeval   struct timeval_dummy
282#endif
283
284#if HAVE_TIMES
285static const int have_times = 1;
286#define struct_tms   struct tms
287#else
288static const int have_times = 0;
289#define times(tms)   ASSERT_FAIL (times not available)
290#define struct_tms   struct tms_dummy
291#endif
292
293struct tms_dummy {
294  long  tms_utime;
295};
296struct timeval_dummy {
297  long  tv_sec;
298  long  tv_usec;
299};
300struct rusage_dummy {
301  struct_timeval ru_utime;
302};
303struct timespec_dummy {
304  long  tv_sec;
305  long  tv_nsec;
306};
307
308static int  use_cycles;
309static int  use_sgi;
310static int  use_rrt;
311static int  use_cgt;
312static int  use_gtod;
313static int  use_grus;
314static int  use_times;
315static int  use_tick_boundary;
316
317static unsigned         start_cycles[2];
318static stck_t           start_stck;
319static unsigned         start_sgi;
320static timebasestruct_t start_rrt;
321static struct_timespec  start_cgt;
322static struct_rusage    start_grus;
323static struct_timeval   start_gtod;
324static struct_tms       start_times;
325
326static double  cycles_limit = 1e100;
327static double  sgi_unittime;
328static double  cgt_unittime;
329static double  grus_unittime;
330static double  gtod_unittime;
331static double  times_unittime;
332
333/* for RTC_POWER format, ie. seconds and nanoseconds */
334#define TIMEBASESTRUCT_SECS(t)  ((t)->tb_high + (t)->tb_low * 1e-9)
335
336
337/* Return a string representing a time in seconds, nicely formatted.
338   Eg. "10.25ms".  */
339char *
340unittime_string (double t)
341{
342  static char  buf[128];
343 
344  const char  *unit;
345  int         prec;
346
347  /* choose units and scale */
348  if (t < 1e-6)
349    t *= 1e9, unit = "ns";
350  else if (t < 1e-3)
351    t *= 1e6, unit = "us";
352  else if (t < 1.0)
353    t *= 1e3, unit = "ms";
354  else
355    unit = "s";
356
357  /* want 4 significant figures */
358  if (t < 1.0)
359    prec = 4;
360  else if (t < 10.0)
361    prec = 3;
362  else if (t < 100.0)
363    prec = 2;
364  else
365    prec = 1;
366
367  sprintf (buf, "%.*f%s", prec, t, unit);
368  return buf;
369}
370
371
372static jmp_buf  cycles_works_buf;
373
374static RETSIGTYPE
375cycles_works_handler (int sig)
376{
377  longjmp (cycles_works_buf, 1);
378}
379
380int
381cycles_works_p (void)
382{
383  static int  result = -1;
384  RETSIGTYPE (*old_handler) _PROTO ((int));
385  unsigned  cycles[2];
386
387  /* suppress a warning about cycles[] unused */
388  cycles[0] = 0;
389
390  if (result != -1)
391    goto done;
392
393#ifdef SIGILL
394  old_handler = signal (SIGILL, cycles_works_handler);
395  if (old_handler == SIG_ERR)
396    {
397      if (speed_option_verbose)
398        printf ("cycles_works_p(): SIGILL not supported, assuming speed_cyclecounter() works\n");
399      goto yes;
400    }
401  if (setjmp (cycles_works_buf))
402    {
403      if (speed_option_verbose)
404        printf ("cycles_works_p(): SIGILL during speed_cyclecounter(), so doesn't work\n");
405      result = 0;
406      goto done;
407    }
408  speed_cyclecounter (cycles);
409  signal (SIGILL, old_handler);
410  if (speed_option_verbose)
411    printf ("cycles_works_p(): speed_cyclecounter() works\n");
412#else
413
414  if (speed_option_verbose)
415    printf ("cycles_works_p(): SIGILL not defined, assuming speed_cyclecounter() works\n");
416#endif
417
418 yes:
419  result = 1;
420
421 done:
422  return result;
423}
424
425
426/* The number of clock ticks per second, but looking at sysconf rather than
427   just CLK_TCK, where possible.  */
428long
429clk_tck (void)
430{
431  static long  result = -1L;
432  if (result != -1L)
433    return result;
434
435#if HAVE_SYSCONF
436  result = sysconf (_SC_CLK_TCK);
437  if (result != -1L)
438    {
439      if (speed_option_verbose)
440        printf ("sysconf(_SC_CLK_TCK) is %ld per second\n", result);
441      return result;
442    }
443
444  fprintf (stderr,
445           "sysconf(_SC_CLK_TCK) not working, using CLK_TCK instead\n");
446#endif
447
448#ifdef CLK_TCK
449  result = CLK_TCK;
450  if (speed_option_verbose)
451    printf ("CLK_TCK is %ld per second\n", result);
452  return result;
453#else
454  fprintf (stderr, "CLK_TCK not defined, cannot continue\n");
455  abort ();
456#endif
457}
458
459
460/* If two times can be observed less than half a clock tick apart, then
461   assume "get" is microsecond accurate.
462
463   Two times only 1 microsecond apart are not believed, since some kernels
464   take it upon themselves to ensure gettimeofday doesn't return the same
465   value twice, for the benefit of applications using it for a timestamp.
466   This is obviously very stupid given the speed of CPUs these days.
467
468   Making "reps" calls to noop_1() is designed to waste some CPU, with a
469   view to getting measurements 2 microseconds (or more) apart.  "reps" is
470   increased progressively until such a period is seen.
471
472   The outer loop "attempts" are just to allow for any random nonsense or
473   system load upsetting the measurements (ie. making two successive calls
474   to "get" come out as a longer interval than normal).
475
476   Bugs:
477
478   The assumption that any interval less than a half tick implies
479   microsecond resolution is obviously fairly rash, the true resolution
480   could be anything between a microsecond and that half tick.  Perhaps
481   something special would have to be done on a system where this is the
482   case, since there's no obvious reliable way to detect it
483   automatically.  */
484
485#define MICROSECONDS_P(name, type, get, sec, usec)                      \
486  {                                                                     \
487    static int  result = -1;                                            \
488    type      st, et;                                                   \
489    long      dt, half_tick;                                            \
490    unsigned  attempt, reps, i, j;                                      \
491                                                                        \
492    if (result != -1)                                                   \
493      return result;                                                    \
494                                                                        \
495    result = 0;                                                         \
496    half_tick = (1000000L / clk_tck ()) / 2;                            \
497                                                                        \
498    for (attempt = 0; attempt < 5; attempt++)                           \
499      {                                                                 \
500        reps = 0;                                                       \
501        for (;;)                                                        \
502          {                                                             \
503            get (st);                                                   \
504            for (i = 0; i < reps; i++)                                  \
505              for (j = 0; j < 100; j++)                                 \
506                noop_1 (CNST_LIMB(0));                                  \
507            get (et);                                                   \
508                                                                        \
509            dt = (sec(et)-sec(st))*1000000L + usec(et)-usec(st);        \
510                                                                        \
511            if (speed_option_verbose >= 2)                              \
512              printf ("%s attempt=%u, reps=%u, dt=%ld\n",               \
513                      name, attempt, reps, dt);                         \
514                                                                        \
515            if (dt >= 2)                                                \
516              break;                                                    \
517                                                                        \
518            reps = (reps == 0 ? 1 : 2*reps);                            \
519            if (reps == 0)                                              \
520              break;  /* uint overflow, not normal */                   \
521          }                                                             \
522                                                                        \
523        if (dt < half_tick)                                             \
524          {                                                             \
525            result = 1;                                                 \
526            break;                                                      \
527          }                                                             \
528      }                                                                 \
529                                                                        \
530    if (speed_option_verbose)                                           \
531      {                                                                 \
532        if (result)                                                     \
533          printf ("%s is microsecond accurate\n", name);                \
534        else                                                            \
535          printf ("%s is only %s clock tick accurate\n",                \
536                  name, unittime_string (1.0/clk_tck()));               \
537      }                                                                 \
538    return result;                                                      \
539  }
540
541
542int
543gettimeofday_microseconds_p (void)
544{
545#define call_gettimeofday(t)   gettimeofday (&(t), NULL)
546#define timeval_tv_sec(t)      ((t).tv_sec)
547#define timeval_tv_usec(t)     ((t).tv_usec)
548  MICROSECONDS_P ("gettimeofday", struct_timeval,
549                  call_gettimeofday, timeval_tv_sec, timeval_tv_usec);
550}
551
552int
553getrusage_microseconds_p (void)
554{
555#define call_getrusage(t)   getrusage (0, &(t))
556#define rusage_tv_sec(t)    ((t).ru_utime.tv_sec)
557#define rusage_tv_usec(t)   ((t).ru_utime.tv_usec)
558  MICROSECONDS_P ("getrusage", struct_rusage,
559                  call_getrusage, rusage_tv_sec, rusage_tv_usec);
560}
561
562
563/* CLOCK_PROCESS_CPUTIME_ID looks like it's going to be in a future version
564   of glibc (some time post 2.2).
565
566   CLOCK_VIRTUAL is process time, available in BSD systems (though sometimes
567   defined, but returning -1 for an error).  */
568
569#ifdef CLOCK_PROCESS_CPUTIME_ID
570# define CGT_ID        CLOCK_PROCESS_CPUTIME_ID
571#else
572# ifdef CLOCK_VIRTUAL
573#  define CGT_ID       CLOCK_VIRTUAL
574# endif
575#endif
576#ifdef CGT_ID
577# define HAVE_CGT_ID  1
578#else
579# define HAVE_CGT_ID  0
580# define CGT_ID       (ASSERT_FAIL (CGT_ID not determined), -1)
581#endif
582
583int
584cgt_works_p (void)
585{
586  static int  result = -1;
587  struct_timespec  unit;
588
589  if (! have_cgt)
590    return 0;
591
592  if (! HAVE_CGT_ID)
593    {
594      if (speed_option_verbose)
595        printf ("clock_gettime don't know what ID to use\n");
596      result = 0;
597      return result;
598    }
599
600  if (result != -1)
601    return result;
602
603  /* trial run to see if it works */
604  if (clock_gettime (CGT_ID, &unit) != 0)
605    {
606      if (speed_option_verbose)
607        printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
608      result = 0;
609      return result;
610    }
611
612  /* get the resolution */
613  if (clock_getres (CGT_ID, &unit) != 0)
614    {
615      if (speed_option_verbose)
616        printf ("clock_getres id=%d error: %s\n", CGT_ID, strerror (errno));
617      result = 0;
618      return result;
619    }
620
621  cgt_unittime = unit.tv_sec + unit.tv_nsec * 1e-9;
622  printf ("clock_gettime is %s accurate\n",
623          unittime_string (cgt_unittime));
624  result = 1;
625  return result;
626}
627
628
629volatile unsigned  *sgi_addr;
630
631int
632sgi_works_p (void)
633{
634#if HAVE_SYSSGI && HAVE_MMAP
635  static int  result = -1;
636
637  size_t          pagesize, offset;
638  __psunsigned_t  phys, physpage;
639  void            *virtpage;
640  unsigned        period_picoseconds;
641  int             size, fd;
642
643  if (result != -1)
644    return result;
645
646  phys = syssgi (SGI_QUERY_CYCLECNTR, &period_picoseconds);
647  if (phys == (__psunsigned_t) -1)
648    {
649      /* ENODEV is the error when a counter is not available */
650      if (speed_option_verbose)
651        printf ("syssgi SGI_QUERY_CYCLECNTR error: %s\n", strerror (errno));
652      result = 0;
653      return result;
654    }
655  sgi_unittime = period_picoseconds * 1e-12;
656
657  /* IRIX 5 doesn't have SGI_CYCLECNTR_SIZE, assume 32 bits in that case.
658     Challenge/ONYX hardware has a 64 bit byte counter, but there seems no
659     obvious way to identify that without SGI_CYCLECNTR_SIZE.  */
660#ifdef SGI_CYCLECNTR_SIZE
661  size = syssgi (SGI_CYCLECNTR_SIZE);
662  if (size == -1)
663    {
664      if (speed_option_verbose)
665        {
666          printf ("syssgi SGI_CYCLECNTR_SIZE error: %s\n", strerror (errno));
667          printf ("    will assume size==4\n");
668        }
669      size = 32;
670    }
671#else
672  size = 32;
673#endif
674
675  if (size < 32)
676    {
677      printf ("syssgi SGI_CYCLECNTR_SIZE gives %d, expected 32 or 64\n", size);
678      result = 0;
679      return result;
680    }
681
682  pagesize = getpagesize();
683  offset = (size_t) phys & (pagesize-1);
684  physpage = phys - offset;
685
686  /* shouldn't cross over a page boundary */
687  ASSERT_ALWAYS (offset + size/8 <= pagesize);
688
689  fd = open("/dev/mmem", O_RDONLY);
690  if (fd == -1)
691    {
692      if (speed_option_verbose)
693        printf ("open /dev/mmem: %s\n", strerror (errno));
694      result = 0;
695      return result;
696    }
697
698  virtpage = mmap (0, pagesize, PROT_READ, MAP_PRIVATE, fd, (off_t) physpage);
699  if (virtpage == (void *) -1)
700    {
701      if (speed_option_verbose)
702        printf ("mmap /dev/mmem: %s\n", strerror (errno));
703      result = 0;
704      return result;
705    }
706
707  /* address of least significant 4 bytes, knowing mips is big endian */
708  sgi_addr = (unsigned *) ((char *) virtpage + offset
709                           + size/8 - sizeof(unsigned));
710  result = 1;
711  return result;
712
713#else /* ! (HAVE_SYSSGI && HAVE_MMAP) */
714  return 0;
715#endif
716}
717
718
719#define DEFAULT(var,n)  \
720  do {                  \
721    if (! (var))        \
722      (var) = (n);      \
723  } while (0)
724
725void
726speed_time_init (void)
727{
728  double supplement_unittime = 0.0;
729
730  static int  speed_time_initialized = 0;
731  if (speed_time_initialized)
732    return;
733  speed_time_initialized = 1;
734
735  speed_cycletime_init ();
736
737  if (have_cycles && cycles_works_p ())
738    {
739      use_cycles = 1;
740      DEFAULT (speed_cycletime, 1.0);
741      speed_unittime = speed_cycletime;
742      DEFAULT (speed_precision, 10000);
743      strcpy (speed_time_string, "CPU cycle counter");
744
745      /* only used if a supplementary method is chosen below */
746      cycles_limit = (have_cycles == 1 ? M_2POW32 : M_2POW64) / 2.0
747        * speed_cycletime;
748
749      if (have_grus && getrusage_microseconds_p())
750        {
751          /* this is a good combination */
752          use_grus = 1;
753          supplement_unittime = grus_unittime = 1.0e-6;
754          strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond getrusage()");
755        }
756      else if (have_cycles == 1)
757        {
758          /* When speed_cyclecounter has a limited range, look for something
759             to supplement it. */
760          if (have_gtod && gettimeofday_microseconds_p())
761            {
762              use_gtod = 1;
763              supplement_unittime = gtod_unittime = 1.0e-6;
764              strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond gettimeofday()");
765            }
766          else if (have_grus)
767            {
768              use_grus = 1;
769              supplement_unittime = grus_unittime = 1.0 / (double) clk_tck ();
770              sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick getrusage()", unittime_string (supplement_unittime));
771            }
772          else if (have_times)
773            {
774              use_times = 1;
775              supplement_unittime = times_unittime = 1.0 / (double) clk_tck ();
776              sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick times()", unittime_string (supplement_unittime));
777            }
778          else if (have_gtod)
779            {
780              use_gtod = 1;
781              supplement_unittime = gtod_unittime = 1.0 / (double) clk_tck ();
782              sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick gettimeofday()", unittime_string (supplement_unittime));
783            }
784          else
785            {
786              fprintf (stderr, "WARNING: cycle counter is 32 bits and there's no other functions.\n");
787              fprintf (stderr, "    Wraparounds may produce bad results on long measurements.\n");
788            }
789        }
790
791      if (use_grus || use_times || use_gtod)
792        {
793          /* must know cycle period to compare cycles to other measuring
794             (via cycles_limit) */
795          speed_cycletime_need_seconds ();
796
797          if (speed_precision * supplement_unittime > cycles_limit)
798            {
799              fprintf (stderr, "WARNING: requested precision can't always be achieved due to limited range\n");
800              fprintf (stderr, "    cycle counter and limited precision supplemental method\n");
801              fprintf (stderr, "    (%s)\n", speed_time_string);
802            }
803        }
804    }
805  else if (have_stck)
806    {
807      strcpy (speed_time_string, "STCK timestamp");
808      /* stck is in units of 2^-12 microseconds, which is very likely higher
809         resolution than a cpu cycle */
810      if (speed_cycletime == 0.0)
811        speed_cycletime_fail
812          ("Need to know CPU frequency for effective stck unit");
813      speed_unittime = MAX (speed_cycletime, STCK_PERIOD);
814      DEFAULT (speed_precision, 10000);
815    }
816  else if (have_sgi && sgi_works_p ())
817    {
818      use_sgi = 1;
819      DEFAULT (speed_precision, 10000);
820      speed_unittime = sgi_unittime;
821      sprintf (speed_time_string, "syssgi() mmap counter (%s), supplemented by millisecond getrusage()",
822               unittime_string (speed_unittime));
823      /* supplemented with getrusage, which we assume to have 1ms resolution */
824      use_grus = 1;
825      supplement_unittime = 1e-3;
826    }
827  else if (have_rrt)
828    {
829      timebasestruct_t  t;
830      use_rrt = 1;
831      DEFAULT (speed_precision, 10000);
832      read_real_time (&t, sizeof(t));
833      switch (t.flag) {
834      case RTC_POWER:
835        /* FIXME: What's the actual RTC resolution? */
836        speed_unittime = 1e-7;
837        strcpy (speed_time_string, "read_real_time() power nanoseconds");
838        break;
839      case RTC_POWER_PC:
840        t.tb_high = 1;
841        t.tb_low = 0;
842        time_base_to_time (&t, sizeof(t));
843        speed_unittime = TIMEBASESTRUCT_SECS(&t) / M_2POW32;
844        sprintf (speed_time_string, "%s read_real_time() powerpc ticks",
845                 unittime_string (speed_unittime));
846        break;
847      default:
848        fprintf (stderr, "ERROR: Unrecognised timebasestruct_t flag=%d\n",
849                 t.flag);
850        abort ();
851      }
852    }
853  else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5e-6)
854    {
855      /* use clock_gettime if microsecond or better resolution */
856    choose_cgt:
857      use_cgt = 1;
858      speed_unittime = cgt_unittime;
859      DEFAULT (speed_precision, (cgt_unittime <= 0.1e-6 ? 10000 : 1000));
860      strcpy (speed_time_string, "microsecond accurate getrusage()");
861    }
862  else if (have_grus && getrusage_microseconds_p())
863    {
864      use_grus = 1;
865      speed_unittime = grus_unittime = 1.0e-6;
866      DEFAULT (speed_precision, 1000);
867      strcpy (speed_time_string, "microsecond accurate getrusage()");
868    }
869  else if (have_gtod && gettimeofday_microseconds_p())
870    {
871      use_gtod = 1;
872      speed_unittime = gtod_unittime = 1.0e-6;
873      DEFAULT (speed_precision, 1000);
874      strcpy (speed_time_string, "microsecond accurate gettimeofday()");
875    }
876  else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5/clk_tck())
877    {
878      /* use clock_gettime if 1 tick or better resolution */
879      goto choose_cgt;
880    }
881  else if (have_times)
882    {
883      use_times = 1;
884      use_tick_boundary = 1;
885      speed_unittime = times_unittime = 1.0 / (double) clk_tck ();
886      DEFAULT (speed_precision, 200);
887      sprintf (speed_time_string, "%s clock tick times()",
888               unittime_string (speed_unittime));
889    }
890  else if (have_grus)
891    {
892      use_grus = 1;
893      use_tick_boundary = 1;
894      speed_unittime = grus_unittime = 1.0 / (double) clk_tck ();
895      DEFAULT (speed_precision, 200);
896      sprintf (speed_time_string, "%s clock tick getrusage()\n",
897               unittime_string (speed_unittime));
898    }
899  else if (have_gtod)
900    {
901      use_gtod = 1;
902      use_tick_boundary = 1;
903      speed_unittime = gtod_unittime = 1.0 / (double) clk_tck ();
904      DEFAULT (speed_precision, 200);
905      sprintf (speed_time_string, "%s clock tick gettimeofday()",
906               unittime_string (speed_unittime));
907    }
908  else
909    {
910      fprintf (stderr, "No time measuring method available\n");
911      fprintf (stderr, "None of: speed_cyclecounter(), STCK(), getrusage(), gettimeofday(), times()\n");
912      abort ();
913    }
914
915  if (speed_option_verbose)
916    {
917      printf ("speed_time_init: %s\n", speed_time_string);
918      printf ("    speed_precision     %d\n", speed_precision);
919      printf ("    speed_unittime      %.2g\n", speed_unittime);
920      if (supplement_unittime)
921        printf ("    supplement_unittime %.2g\n", supplement_unittime);
922      printf ("    use_tick_boundary   %d\n", use_tick_boundary);
923      if (have_cycles)
924        printf ("    cycles_limit        %.2g seconds\n", cycles_limit);
925    }
926}
927
928
929
930/* Burn up CPU until a clock tick boundary, for greater accuracy.  Set the
931   corresponding "start_foo" appropriately too. */
932
933void
934grus_tick_boundary (void)
935{
936  struct_rusage  prev;
937  getrusage (0, &prev);
938  do {
939    getrusage (0, &start_grus);
940  } while (start_grus.ru_utime.tv_usec == prev.ru_utime.tv_usec);
941}
942
943void
944gtod_tick_boundary (void)
945{
946  struct_timeval  prev;
947  gettimeofday (&prev, NULL);
948  do {
949    gettimeofday (&start_gtod, NULL);
950  } while (start_gtod.tv_usec == prev.tv_usec);
951}
952
953void
954times_tick_boundary (void)
955{
956  struct_tms  prev;
957  times (&prev);
958  do
959    times (&start_times);
960  while (start_times.tms_utime == prev.tms_utime);
961}
962
963
964/* "have_" values are tested to let unused code go dead.  */
965
966void
967speed_starttime (void)
968{
969  speed_time_init ();
970
971  if (have_grus && use_grus)
972    {
973      if (use_tick_boundary)
974        grus_tick_boundary ();
975      else
976        getrusage (0, &start_grus);
977    }
978
979  if (have_gtod && use_gtod)
980    {
981      if (use_tick_boundary)
982        gtod_tick_boundary ();
983      else
984        gettimeofday (&start_gtod, NULL);
985    }
986
987  if (have_times && use_times)
988    {
989      if (use_tick_boundary)
990        times_tick_boundary ();
991      else
992        times (&start_times);
993    }
994
995  if (have_cgt && use_cgt)
996    clock_gettime (CGT_ID, &start_cgt);
997
998  if (have_rrt && use_rrt)
999    read_real_time (&start_rrt, sizeof(start_rrt));
1000
1001  if (have_sgi && use_sgi)
1002    start_sgi = *sgi_addr;
1003
1004  if (have_stck && use_stck)
1005    STCK (start_stck);
1006
1007  /* Cycles sampled last for maximum accuracy. */
1008  if (have_cycles && use_cycles)
1009    speed_cyclecounter (start_cycles);
1010}
1011
1012
1013/* Calculate the difference between two cycle counter samples, as a "double"
1014   counter of cycles.
1015
1016   The start and end values are allowed to cancel in integers in case the
1017   counter values are bigger than the 53 bits that normally fit in a double.
1018
1019   This works even if speed_cyclecounter() puts a value bigger than 32-bits
1020   in the low word (the high word always gets a 2**32 multiplier though). */
1021
1022double
1023speed_cyclecounter_diff (const unsigned end[2], const unsigned start[2])
1024{
1025  unsigned  d;
1026  double    t;
1027
1028  if (have_cycles == 1)
1029    {
1030      t = (end[0] - start[0]);
1031    }
1032  else
1033    {
1034      d = end[0] - start[0];
1035      t = d - (d > end[0] ? M_2POWU : 0.0);
1036      t += (end[1] - start[1]) * M_2POW32;
1037    }
1038  return t;
1039}
1040
1041
1042/* Calculate the difference between "start" and "end" using fields "sec" and
1043   "psec", where each "psec" is a "punit" of a second.
1044
1045   The seconds parts are allowed to cancel before being combined with the
1046   psec parts, in case a simple "sec+psec*punit" exceeds the precision of a
1047   double.
1048
1049   Total time is only calculated in a "double" since an integer count of
1050   psecs might overflow.  2^32 microseconds is only a bit over an hour, or
1051   2^32 nanoseconds only about 4 seconds.
1052
1053   The casts to "long" are for the beneifit of timebasestruct_t, where the
1054   fields are only "unsigned int", but we want a signed difference.  */
1055
1056#define DIFF_SECS_ROUTINE(sec, psec, punit)                     \
1057  {                                                             \
1058    long  sec_diff, psec_diff;                                  \
1059    sec_diff = (long) end->sec - (long) start->sec;             \
1060    psec_diff = (long) end->psec - (long) start->psec;          \
1061    return (double) sec_diff + punit * (double) psec_diff;      \
1062  }
1063
1064double
1065timeval_diff_secs (const struct_timeval *end, const struct_timeval *start)
1066{
1067  DIFF_SECS_ROUTINE (tv_sec, tv_usec, 1e-6);
1068}
1069
1070double
1071rusage_diff_secs (const struct_rusage *end, const struct_rusage *start)
1072{
1073  DIFF_SECS_ROUTINE (ru_utime.tv_sec, ru_utime.tv_usec, 1e-6);
1074}
1075
1076double
1077timespec_diff_secs (const struct_timespec *end, const struct_timespec *start)
1078{
1079  DIFF_SECS_ROUTINE (tv_sec, tv_nsec, 1e-9);
1080}
1081
1082/* This is for use after time_base_to_time, ie. for seconds and nanoseconds. */
1083double
1084timebasestruct_diff_secs (const timebasestruct_t *end,
1085                          const timebasestruct_t *start)
1086{
1087  DIFF_SECS_ROUTINE (tb_high, tb_low, 1e-9);
1088}
1089
1090
1091double
1092speed_endtime (void)
1093{
1094#define END_USE(name,value)                             \
1095  do {                                                  \
1096    if (speed_option_verbose >= 3)                      \
1097      printf ("speed_endtime(): used %s\n", name);      \
1098    result = value;                                     \
1099    goto done;                                          \
1100  } while (0)
1101
1102#define END_ENOUGH(name,value)                                          \
1103  do {                                                                  \
1104    if (speed_option_verbose >= 3)                                      \
1105      printf ("speed_endtime(): %s gives enough precision\n", name);    \
1106    result = value;                                                     \
1107    goto done;                                                          \
1108  } while (0)
1109
1110#define END_EXCEED(name,value)                                            \
1111  do {                                                                    \
1112    if (speed_option_verbose >= 3)                                        \
1113      printf ("speed_endtime(): cycle counter limit exceeded, used %s\n", \
1114              name);                                                      \
1115    result = value;                                                       \
1116    goto done;                                                            \
1117  } while (0)
1118
1119  unsigned          end_cycles[2];
1120  stck_t            end_stck;
1121  unsigned          end_sgi;
1122  timebasestruct_t  end_rrt;
1123  struct_timespec   end_cgt;
1124  struct_timeval    end_gtod;
1125  struct_rusage     end_grus;
1126  struct_tms        end_times;
1127  double            t_gtod, t_grus, t_times, t_cgt;
1128  double            t_rrt, t_sgi, t_stck, t_cycles;
1129  double            result;
1130
1131  /* Cycles sampled first for maximum accuracy.
1132     "have_" values tested to let unused code go dead.  */
1133
1134  if (have_cycles && use_cycles)  speed_cyclecounter (end_cycles);
1135  if (have_stck   && use_stck)    STCK (end_stck);
1136  if (have_sgi    && use_sgi)     end_sgi = *sgi_addr;
1137  if (have_rrt    && use_rrt)     read_real_time (&end_rrt, sizeof(end_rrt));
1138  if (have_cgt    && use_cgt)     clock_gettime (CGT_ID, &end_cgt);
1139  if (have_gtod   && use_gtod)    gettimeofday (&end_gtod, NULL);
1140  if (have_grus   && use_grus)    getrusage (0, &end_grus);
1141  if (have_times  && use_times)   times (&end_times);
1142
1143  result = -1.0;
1144
1145  if (speed_option_verbose >= 4)
1146    {
1147      printf ("speed_endtime():\n");
1148      if (use_cycles)
1149        printf ("   cycles  0x%X,0x%X -> 0x%X,0x%X\n",
1150                start_cycles[1], start_cycles[0],
1151                end_cycles[1], end_cycles[0]);
1152
1153      if (use_stck)
1154        printf ("   stck  0x%lX -> 0x%lX\n", start_stck, end_stck);
1155
1156      if (use_sgi)
1157        printf ("   sgi  0x%X -> 0x%X\n", start_sgi, end_sgi);
1158
1159      if (use_rrt)
1160        printf ("   read_real_time  (%d)%u,%u -> (%d)%u,%u\n",
1161                start_rrt.flag, start_rrt.tb_high, start_rrt.tb_low,
1162                end_rrt.flag, end_rrt.tb_high, end_rrt.tb_low);
1163
1164      if (use_cgt)
1165        printf ("   clock_gettime  %ld.%09ld -> %ld.%09ld\n",
1166                start_cgt.tv_sec, start_cgt.tv_nsec,
1167                end_cgt.tv_sec, end_cgt.tv_nsec);
1168
1169      if (use_gtod)
1170        printf ("   gettimeofday  %ld.%06ld -> %ld.%06ld\n",
1171                start_gtod.tv_sec, start_gtod.tv_usec,
1172                end_gtod.tv_sec, end_gtod.tv_usec);
1173
1174      if (use_grus)
1175        printf ("   getrusage  %ld.%06ld -> %ld.%06ld\n",
1176                start_grus.ru_utime.tv_sec, start_grus.ru_utime.tv_usec,
1177                end_grus.ru_utime.tv_sec, end_grus.ru_utime.tv_usec);
1178
1179      if (use_times)
1180        printf ("   times  %ld -> %ld\n",
1181                start_times.tms_utime, end_times.tms_utime);
1182    }
1183
1184  if (use_rrt)
1185    {
1186      time_base_to_time (&start_rrt, sizeof(start_rrt));
1187      time_base_to_time (&end_rrt, sizeof(end_rrt));
1188      t_rrt = timebasestruct_diff_secs (&end_rrt, &start_rrt);
1189      END_USE ("read_real_time()", t_rrt);
1190    }
1191
1192  if (use_cgt)
1193    {
1194      t_cgt = timespec_diff_secs (&end_cgt, &start_cgt);
1195      END_USE ("clock_gettime()", t_cgt);
1196    }
1197
1198  if (use_grus)
1199    {
1200      t_grus = rusage_diff_secs (&end_grus, &start_grus);
1201
1202      /* Use getrusage() if the cycle counter limit would be exceeded, or if
1203         it provides enough accuracy already. */
1204      if (use_cycles) 
1205        {
1206          if (t_grus >= speed_precision*grus_unittime)
1207            END_ENOUGH ("getrusage()", t_grus);
1208          if (t_grus >= cycles_limit)
1209            END_EXCEED ("getrusage()", t_grus);
1210        }
1211    }
1212
1213  if (use_times)
1214    {
1215      t_times = (end_times.tms_utime - start_times.tms_utime) * times_unittime;
1216
1217      /* Use times() if the cycle counter limit would be exceeded, or if
1218         it provides enough accuracy already. */
1219      if (use_cycles) 
1220        {
1221          if (t_times >= speed_precision*times_unittime)
1222            END_ENOUGH ("times()", t_times);
1223          if (t_times >= cycles_limit)
1224            END_EXCEED ("times()", t_times);
1225        }
1226    }
1227
1228  if (use_gtod)
1229    {
1230      t_gtod = timeval_diff_secs (&end_gtod, &start_gtod);
1231
1232      /* Use gettimeofday() if it measured a value bigger than the cycle
1233         counter can handle.  */
1234      if (use_cycles) 
1235        {
1236          if (t_gtod >= cycles_limit)
1237            END_EXCEED ("gettimeofday()", t_gtod);
1238        }
1239    }
1240 
1241  if (use_stck) 
1242    {
1243      t_stck = (end_stck - start_stck) * STCK_PERIOD;
1244      END_USE ("stck", t_stck);
1245    }
1246
1247  if (use_sgi)
1248    {
1249      t_sgi = (end_sgi - start_sgi) * sgi_unittime;
1250      END_USE ("SGI hardware counter", t_sgi);
1251    }
1252
1253  if (use_cycles) 
1254    {
1255      t_cycles = speed_cyclecounter_diff (end_cycles, start_cycles)
1256        * speed_cycletime;
1257      END_USE ("cycle counter", t_cycles);
1258    }
1259
1260  if (use_grus && getrusage_microseconds_p())
1261    END_USE ("getrusage()", t_grus);
1262
1263  if (use_gtod && gettimeofday_microseconds_p())
1264    END_USE ("gettimeofday()", t_gtod);
1265
1266  if (use_times)  END_USE ("times()",        t_times);
1267  if (use_grus)   END_USE ("getrusage()",    t_grus);
1268  if (use_gtod)   END_USE ("gettimeofday()", t_gtod);
1269
1270  fprintf (stderr, "speed_endtime(): oops, no time method available\n");
1271  abort ();
1272
1273 done:
1274  if (result < 0.0)
1275    {
1276      fprintf (stderr,
1277               "speed_endtime(): fatal error: negative time measured: %.9f\n",
1278               result);
1279      abort ();
1280    }
1281  return result;
1282}
Note: See TracBrowser for help on using the repository browser.