1 | /* Time routines for speed measurments. |
---|
2 | |
---|
3 | Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. |
---|
4 | |
---|
5 | This file is part of the GNU MP Library. |
---|
6 | |
---|
7 | The GNU MP Library is free software; you can redistribute it and/or modify |
---|
8 | it under the terms of the GNU Lesser General Public License as published by |
---|
9 | the Free Software Foundation; either version 2.1 of the License, or (at your |
---|
10 | option) any later version. |
---|
11 | |
---|
12 | The GNU MP Library is distributed in the hope that it will be useful, but |
---|
13 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
---|
14 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
---|
15 | License for more details. |
---|
16 | |
---|
17 | You should have received a copy of the GNU Lesser General Public License |
---|
18 | along with the GNU MP Library; see the file COPYING.LIB. If not, write to |
---|
19 | the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
---|
20 | MA 02111-1307, USA. */ |
---|
21 | |
---|
22 | |
---|
23 | /* speed_time_init() - initialize timing things. speed_starttime() calls |
---|
24 | this if it hasn't been done yet, so you only need to call this explicitly |
---|
25 | if you want to use the global variables before the first measurement. |
---|
26 | |
---|
27 | speed_starttime() - start a time measurment. |
---|
28 | |
---|
29 | speed_endtime() - end a time measurment, return time taken (seconds or |
---|
30 | cycles). |
---|
31 | |
---|
32 | speed_time_string - a string describing the time method in use. |
---|
33 | |
---|
34 | speed_unittime - global variable with the unit of time measurement |
---|
35 | accuracy (seconds or cycles). |
---|
36 | |
---|
37 | speed_precision - global variable which is the intended accuracy of time |
---|
38 | measurements. speed_measure() for instance runs target routines with |
---|
39 | enough repetitions so it takes at least speed_unittime*speed_precision. |
---|
40 | A program can provide an option so the user can set this, otherwise it |
---|
41 | gets a default based on the measuring method chosen. |
---|
42 | |
---|
43 | speed_cycletime - the time in seconds for each CPU cycle, for example on |
---|
44 | a 100 MHz CPU this would be 1.0e-8. If the CPU frequency is unknown this |
---|
45 | is 0.0 if the time base is in seconds, or 1.0 if it's in cycles. |
---|
46 | |
---|
47 | |
---|
48 | speed_endtime() and speed_unittime are normally in seconds, but if a |
---|
49 | cycle counter is being used to measure and the CPU frequency is unknown, |
---|
50 | then speed_endtime() returns cycles and speed_cycletime and |
---|
51 | speed_unittime are 1.0. |
---|
52 | |
---|
53 | Notice that speed_unittime*speed_precision is the target duration for |
---|
54 | speed_endtime(), irrespective of whether that's in seconds or cycles. |
---|
55 | |
---|
56 | Call speed_cycletime_need_seconds() to demand that speed_endtime() is in |
---|
57 | seconds and not perhaps in cycles. |
---|
58 | |
---|
59 | Call speed_cycletime_need_cycles() to demand that speed_cycletime is |
---|
60 | non-zero, so that speed_endtime()/speed_cycletime will work to give times |
---|
61 | in cycles. |
---|
62 | |
---|
63 | |
---|
64 | Notes: |
---|
65 | |
---|
66 | Various combinations of cycle counter, read_real_time(), getrusage(), |
---|
67 | gettimeofday() and times() can arise, according to which are available |
---|
68 | and their precision. |
---|
69 | |
---|
70 | |
---|
71 | Allowing speed_endtime() to return either seconds or cycles is only a |
---|
72 | slight complication and makes it possible for the speed program to do |
---|
73 | some sensible things without demanding the CPU frequency. If seconds are |
---|
74 | being measured then it can always print seconds, and if cycles are being |
---|
75 | measured then it can always print them without needing to know how long |
---|
76 | they are. Also the tune program doesn't care at all what the units are. |
---|
77 | |
---|
78 | GMP_CPU_FREQUENCY can always be set when the automated methods in freq.c |
---|
79 | fail. This will be needed if times in seconds are wanted but a cycle |
---|
80 | counter is being used, or if times in cycles are wanted but getrusage or |
---|
81 | another seconds based timer is in use. |
---|
82 | |
---|
83 | If the measuring method uses a cycle counter but supplements it with |
---|
84 | getrusage or the like, then knowing the CPU frequency is mandatory since |
---|
85 | the code compares values from the two. |
---|
86 | |
---|
87 | |
---|
88 | Solaris gethrtime() seems no more than a slow way to access the Sparc V9 |
---|
89 | cycle counter. gethrvtime() seems to be relevant only to LWP, it doesn't |
---|
90 | for instance give nanosecond virtual time. So neither of these are used. |
---|
91 | |
---|
92 | |
---|
93 | Bugs: |
---|
94 | |
---|
95 | getrusage_microseconds_p is fundamentally flawed, getrusage and |
---|
96 | gettimeofday can have resolutions other than clock ticks or microseconds, |
---|
97 | for instance IRIX 5 has a tick of 10 ms but a getrusage of 1 ms. |
---|
98 | |
---|
99 | Enhancements: |
---|
100 | |
---|
101 | The SGI hardware counter has 64 bits on some machines, which could be |
---|
102 | used when available. But perhaps 32 bits is enough range, and then rely |
---|
103 | on the getrusage supplement. |
---|
104 | |
---|
105 | Maybe getrusage (or times) should be used as a supplement for any |
---|
106 | wall-clock measuring method. Currently a wall clock with a good range |
---|
107 | (eg. a 64-bit cycle counter) is used without a supplement. |
---|
108 | |
---|
109 | On PowerPC the timebase registers could be used, but would have to do |
---|
110 | something to find out the speed. On 6xx chips it's normally 1/4 bus |
---|
111 | speed, on 4xx chips it's wither that or an external clock. Measuring |
---|
112 | against gettimeofday might be ok. */ |
---|
113 | |
---|
114 | |
---|
115 | #include "config.h" |
---|
116 | |
---|
117 | #include <errno.h> |
---|
118 | #include <setjmp.h> |
---|
119 | #include <signal.h> |
---|
120 | #include <stddef.h> |
---|
121 | #include <stdio.h> |
---|
122 | #include <string.h> |
---|
123 | #include <stdlib.h> /* for getenv() */ |
---|
124 | |
---|
125 | #if HAVE_FCNTL_H |
---|
126 | #include <fcntl.h> /* for open() */ |
---|
127 | #endif |
---|
128 | |
---|
129 | #if HAVE_STDINT_H |
---|
130 | #include <stdint.h> /* for uint64_t */ |
---|
131 | #endif |
---|
132 | |
---|
133 | #if HAVE_UNISTD_H |
---|
134 | #include <unistd.h> /* for sysconf() */ |
---|
135 | #endif |
---|
136 | |
---|
137 | #include <sys/types.h> |
---|
138 | |
---|
139 | #if TIME_WITH_SYS_TIME |
---|
140 | # include <sys/time.h> /* for struct timeval */ |
---|
141 | # include <time.h> |
---|
142 | #else |
---|
143 | # if HAVE_SYS_TIME_H |
---|
144 | # include <sys/time.h> |
---|
145 | # else |
---|
146 | # include <time.h> |
---|
147 | # endif |
---|
148 | #endif |
---|
149 | |
---|
150 | #if HAVE_SYS_MMAN_H |
---|
151 | #include <sys/mman.h> /* for mmap() */ |
---|
152 | #endif |
---|
153 | |
---|
154 | #if HAVE_SYS_RESOURCE_H |
---|
155 | #include <sys/resource.h> /* for struct rusage */ |
---|
156 | #endif |
---|
157 | |
---|
158 | #if HAVE_SYS_SYSSGI_H |
---|
159 | #include <sys/syssgi.h> /* for syssgi() */ |
---|
160 | #endif |
---|
161 | |
---|
162 | #if HAVE_SYS_SYSTEMCFG_H |
---|
163 | #include <sys/systemcfg.h> /* for RTC_POWER on AIX */ |
---|
164 | #endif |
---|
165 | |
---|
166 | #if HAVE_SYS_TIMES_H |
---|
167 | #include <sys/times.h> /* for times() and struct tms */ |
---|
168 | #endif |
---|
169 | |
---|
170 | #include "gmp.h" |
---|
171 | #include "gmp-impl.h" |
---|
172 | |
---|
173 | #include "speed.h" |
---|
174 | |
---|
175 | |
---|
176 | |
---|
177 | char speed_time_string[256]; |
---|
178 | int speed_precision = 0; |
---|
179 | double speed_unittime; |
---|
180 | double speed_cycletime = 0.0; |
---|
181 | |
---|
182 | |
---|
183 | /* don't rely on "unsigned" to "double" conversion, it's broken in SunOS 4 |
---|
184 | native cc */ |
---|
185 | #define M_2POWU (((double) INT_MAX + 1.0) * 2.0) |
---|
186 | |
---|
187 | #define M_2POW32 4294967296.0 |
---|
188 | #define M_2POW64 (M_2POW32 * M_2POW32) |
---|
189 | |
---|
190 | |
---|
191 | /* Conditionals for the time functions available are done with normal C |
---|
192 | code, which is a lot easier than wildly nested preprocessor directives. |
---|
193 | |
---|
194 | The choice of what to use is partly made at run-time, according to |
---|
195 | whether the cycle counter works and the measured accuracy of getrusage |
---|
196 | and gettimeofday. |
---|
197 | |
---|
198 | A routine that's not available won't be getting called, but is an abort() |
---|
199 | to be sure it isn't called mistakenly. |
---|
200 | |
---|
201 | It can be assumed that if a function exists then its data type will, but |
---|
202 | if the function doesn't then the data type might or might not exist, so |
---|
203 | the type can't be used unconditionally. The "struct_rusage" etc macros |
---|
204 | provide dummies when the respective function doesn't exist. */ |
---|
205 | |
---|
206 | |
---|
207 | #if HAVE_SPEED_CYCLECOUNTER |
---|
208 | static const int have_cycles = HAVE_SPEED_CYCLECOUNTER; |
---|
209 | #else |
---|
210 | static const int have_cycles = 0; |
---|
211 | #define speed_cyclecounter(p) ASSERT_FAIL (speed_cyclecounter not available) |
---|
212 | #endif |
---|
213 | |
---|
214 | /* "stck" returns ticks since 1 Jan 1900 00:00 GMT, where each tick is 2^-12 |
---|
215 | microseconds. Same #ifdefs here as in longlong.h. */ |
---|
216 | #if defined (__GNUC__) && ! defined (NO_ASM) \ |
---|
217 | && (defined (__i370__) || defined (__s390__) || defined (__mvs__)) |
---|
218 | static const int have_stck = 1; |
---|
219 | static const int use_stck = 1; /* always use when available */ |
---|
220 | typedef uint64_t stck_t; /* gcc for s390 is quite new, always has uint64_t */ |
---|
221 | #define STCK(timestamp) \ |
---|
222 | do { \ |
---|
223 | asm ("stck %0" : "=m" (timestamp)); \ |
---|
224 | } while (0) |
---|
225 | #else |
---|
226 | static const int have_stck = 0; |
---|
227 | static const int use_stck = 0; |
---|
228 | typedef unsigned long stck_t; /* dummy */ |
---|
229 | #define STCK(timestamp) ASSERT_FAIL (stck instruction not available) |
---|
230 | #endif |
---|
231 | #define STCK_PERIOD (1.0 / 4096e6) /* 2^-12 microseconds */ |
---|
232 | |
---|
233 | /* Unicos 10.X has syssgi(), but not mmap(). */ |
---|
234 | #if HAVE_SYSSGI && HAVE_MMAP |
---|
235 | static const int have_sgi = 1; |
---|
236 | #else |
---|
237 | static const int have_sgi = 0; |
---|
238 | #endif |
---|
239 | |
---|
240 | #if HAVE_READ_REAL_TIME |
---|
241 | static const int have_rrt = 1; |
---|
242 | #else |
---|
243 | static const int have_rrt = 0; |
---|
244 | #define read_real_time(t,s) ASSERT_FAIL (read_real_time not available) |
---|
245 | #define time_base_to_time(t,s) ASSERT_FAIL (time_base_to_time not available) |
---|
246 | #define RTC_POWER 1 |
---|
247 | #define RTC_POWER_PC 2 |
---|
248 | #define timebasestruct_t struct timebasestruct_dummy |
---|
249 | struct timebasestruct_dummy { |
---|
250 | int flag; |
---|
251 | unsigned int tb_high; |
---|
252 | unsigned int tb_low; |
---|
253 | }; |
---|
254 | #endif |
---|
255 | |
---|
256 | #if HAVE_CLOCK_GETTIME |
---|
257 | static const int have_cgt = 1; |
---|
258 | #define struct_timespec struct timespec |
---|
259 | #else |
---|
260 | static const int have_cgt = 0; |
---|
261 | #define struct_timespec struct timespec_dummy |
---|
262 | #define clock_gettime(id,ts) (ASSERT_FAIL (clock_gettime not available), -1) |
---|
263 | #define clock_getres(id,ts) (ASSERT_FAIL (clock_getres not available), -1) |
---|
264 | #endif |
---|
265 | |
---|
266 | #if HAVE_GETRUSAGE |
---|
267 | static const int have_grus = 1; |
---|
268 | #define struct_rusage struct rusage |
---|
269 | #else |
---|
270 | static const int have_grus = 0; |
---|
271 | #define getrusage(n,ru) ASSERT_FAIL (getrusage not available) |
---|
272 | #define struct_rusage struct rusage_dummy |
---|
273 | #endif |
---|
274 | |
---|
275 | #if HAVE_GETTIMEOFDAY |
---|
276 | static const int have_gtod = 1; |
---|
277 | #define struct_timeval struct timeval |
---|
278 | #else |
---|
279 | static const int have_gtod = 0; |
---|
280 | #define gettimeofday(tv,tz) ASSERT_FAIL (gettimeofday not available) |
---|
281 | #define struct_timeval struct timeval_dummy |
---|
282 | #endif |
---|
283 | |
---|
284 | #if HAVE_TIMES |
---|
285 | static const int have_times = 1; |
---|
286 | #define struct_tms struct tms |
---|
287 | #else |
---|
288 | static const int have_times = 0; |
---|
289 | #define times(tms) ASSERT_FAIL (times not available) |
---|
290 | #define struct_tms struct tms_dummy |
---|
291 | #endif |
---|
292 | |
---|
293 | struct tms_dummy { |
---|
294 | long tms_utime; |
---|
295 | }; |
---|
296 | struct timeval_dummy { |
---|
297 | long tv_sec; |
---|
298 | long tv_usec; |
---|
299 | }; |
---|
300 | struct rusage_dummy { |
---|
301 | struct_timeval ru_utime; |
---|
302 | }; |
---|
303 | struct timespec_dummy { |
---|
304 | long tv_sec; |
---|
305 | long tv_nsec; |
---|
306 | }; |
---|
307 | |
---|
308 | static int use_cycles; |
---|
309 | static int use_sgi; |
---|
310 | static int use_rrt; |
---|
311 | static int use_cgt; |
---|
312 | static int use_gtod; |
---|
313 | static int use_grus; |
---|
314 | static int use_times; |
---|
315 | static int use_tick_boundary; |
---|
316 | |
---|
317 | static unsigned start_cycles[2]; |
---|
318 | static stck_t start_stck; |
---|
319 | static unsigned start_sgi; |
---|
320 | static timebasestruct_t start_rrt; |
---|
321 | static struct_timespec start_cgt; |
---|
322 | static struct_rusage start_grus; |
---|
323 | static struct_timeval start_gtod; |
---|
324 | static struct_tms start_times; |
---|
325 | |
---|
326 | static double cycles_limit = 1e100; |
---|
327 | static double sgi_unittime; |
---|
328 | static double cgt_unittime; |
---|
329 | static double grus_unittime; |
---|
330 | static double gtod_unittime; |
---|
331 | static double times_unittime; |
---|
332 | |
---|
333 | /* for RTC_POWER format, ie. seconds and nanoseconds */ |
---|
334 | #define TIMEBASESTRUCT_SECS(t) ((t)->tb_high + (t)->tb_low * 1e-9) |
---|
335 | |
---|
336 | |
---|
337 | /* Return a string representing a time in seconds, nicely formatted. |
---|
338 | Eg. "10.25ms". */ |
---|
339 | char * |
---|
340 | unittime_string (double t) |
---|
341 | { |
---|
342 | static char buf[128]; |
---|
343 | |
---|
344 | const char *unit; |
---|
345 | int prec; |
---|
346 | |
---|
347 | /* choose units and scale */ |
---|
348 | if (t < 1e-6) |
---|
349 | t *= 1e9, unit = "ns"; |
---|
350 | else if (t < 1e-3) |
---|
351 | t *= 1e6, unit = "us"; |
---|
352 | else if (t < 1.0) |
---|
353 | t *= 1e3, unit = "ms"; |
---|
354 | else |
---|
355 | unit = "s"; |
---|
356 | |
---|
357 | /* want 4 significant figures */ |
---|
358 | if (t < 1.0) |
---|
359 | prec = 4; |
---|
360 | else if (t < 10.0) |
---|
361 | prec = 3; |
---|
362 | else if (t < 100.0) |
---|
363 | prec = 2; |
---|
364 | else |
---|
365 | prec = 1; |
---|
366 | |
---|
367 | sprintf (buf, "%.*f%s", prec, t, unit); |
---|
368 | return buf; |
---|
369 | } |
---|
370 | |
---|
371 | |
---|
372 | static jmp_buf cycles_works_buf; |
---|
373 | |
---|
374 | static RETSIGTYPE |
---|
375 | cycles_works_handler (int sig) |
---|
376 | { |
---|
377 | longjmp (cycles_works_buf, 1); |
---|
378 | } |
---|
379 | |
---|
380 | int |
---|
381 | cycles_works_p (void) |
---|
382 | { |
---|
383 | static int result = -1; |
---|
384 | RETSIGTYPE (*old_handler) _PROTO ((int)); |
---|
385 | unsigned cycles[2]; |
---|
386 | |
---|
387 | /* suppress a warning about cycles[] unused */ |
---|
388 | cycles[0] = 0; |
---|
389 | |
---|
390 | if (result != -1) |
---|
391 | goto done; |
---|
392 | |
---|
393 | #ifdef SIGILL |
---|
394 | old_handler = signal (SIGILL, cycles_works_handler); |
---|
395 | if (old_handler == SIG_ERR) |
---|
396 | { |
---|
397 | if (speed_option_verbose) |
---|
398 | printf ("cycles_works_p(): SIGILL not supported, assuming speed_cyclecounter() works\n"); |
---|
399 | goto yes; |
---|
400 | } |
---|
401 | if (setjmp (cycles_works_buf)) |
---|
402 | { |
---|
403 | if (speed_option_verbose) |
---|
404 | printf ("cycles_works_p(): SIGILL during speed_cyclecounter(), so doesn't work\n"); |
---|
405 | result = 0; |
---|
406 | goto done; |
---|
407 | } |
---|
408 | speed_cyclecounter (cycles); |
---|
409 | signal (SIGILL, old_handler); |
---|
410 | if (speed_option_verbose) |
---|
411 | printf ("cycles_works_p(): speed_cyclecounter() works\n"); |
---|
412 | #else |
---|
413 | |
---|
414 | if (speed_option_verbose) |
---|
415 | printf ("cycles_works_p(): SIGILL not defined, assuming speed_cyclecounter() works\n"); |
---|
416 | #endif |
---|
417 | |
---|
418 | yes: |
---|
419 | result = 1; |
---|
420 | |
---|
421 | done: |
---|
422 | return result; |
---|
423 | } |
---|
424 | |
---|
425 | |
---|
426 | /* The number of clock ticks per second, but looking at sysconf rather than |
---|
427 | just CLK_TCK, where possible. */ |
---|
428 | long |
---|
429 | clk_tck (void) |
---|
430 | { |
---|
431 | static long result = -1L; |
---|
432 | if (result != -1L) |
---|
433 | return result; |
---|
434 | |
---|
435 | #if HAVE_SYSCONF |
---|
436 | result = sysconf (_SC_CLK_TCK); |
---|
437 | if (result != -1L) |
---|
438 | { |
---|
439 | if (speed_option_verbose) |
---|
440 | printf ("sysconf(_SC_CLK_TCK) is %ld per second\n", result); |
---|
441 | return result; |
---|
442 | } |
---|
443 | |
---|
444 | fprintf (stderr, |
---|
445 | "sysconf(_SC_CLK_TCK) not working, using CLK_TCK instead\n"); |
---|
446 | #endif |
---|
447 | |
---|
448 | #ifdef CLK_TCK |
---|
449 | result = CLK_TCK; |
---|
450 | if (speed_option_verbose) |
---|
451 | printf ("CLK_TCK is %ld per second\n", result); |
---|
452 | return result; |
---|
453 | #else |
---|
454 | fprintf (stderr, "CLK_TCK not defined, cannot continue\n"); |
---|
455 | abort (); |
---|
456 | #endif |
---|
457 | } |
---|
458 | |
---|
459 | |
---|
460 | /* If two times can be observed less than half a clock tick apart, then |
---|
461 | assume "get" is microsecond accurate. |
---|
462 | |
---|
463 | Two times only 1 microsecond apart are not believed, since some kernels |
---|
464 | take it upon themselves to ensure gettimeofday doesn't return the same |
---|
465 | value twice, for the benefit of applications using it for a timestamp. |
---|
466 | This is obviously very stupid given the speed of CPUs these days. |
---|
467 | |
---|
468 | Making "reps" calls to noop_1() is designed to waste some CPU, with a |
---|
469 | view to getting measurements 2 microseconds (or more) apart. "reps" is |
---|
470 | increased progressively until such a period is seen. |
---|
471 | |
---|
472 | The outer loop "attempts" are just to allow for any random nonsense or |
---|
473 | system load upsetting the measurements (ie. making two successive calls |
---|
474 | to "get" come out as a longer interval than normal). |
---|
475 | |
---|
476 | Bugs: |
---|
477 | |
---|
478 | The assumption that any interval less than a half tick implies |
---|
479 | microsecond resolution is obviously fairly rash, the true resolution |
---|
480 | could be anything between a microsecond and that half tick. Perhaps |
---|
481 | something special would have to be done on a system where this is the |
---|
482 | case, since there's no obvious reliable way to detect it |
---|
483 | automatically. */ |
---|
484 | |
---|
485 | #define MICROSECONDS_P(name, type, get, sec, usec) \ |
---|
486 | { \ |
---|
487 | static int result = -1; \ |
---|
488 | type st, et; \ |
---|
489 | long dt, half_tick; \ |
---|
490 | unsigned attempt, reps, i, j; \ |
---|
491 | \ |
---|
492 | if (result != -1) \ |
---|
493 | return result; \ |
---|
494 | \ |
---|
495 | result = 0; \ |
---|
496 | half_tick = (1000000L / clk_tck ()) / 2; \ |
---|
497 | \ |
---|
498 | for (attempt = 0; attempt < 5; attempt++) \ |
---|
499 | { \ |
---|
500 | reps = 0; \ |
---|
501 | for (;;) \ |
---|
502 | { \ |
---|
503 | get (st); \ |
---|
504 | for (i = 0; i < reps; i++) \ |
---|
505 | for (j = 0; j < 100; j++) \ |
---|
506 | noop_1 (CNST_LIMB(0)); \ |
---|
507 | get (et); \ |
---|
508 | \ |
---|
509 | dt = (sec(et)-sec(st))*1000000L + usec(et)-usec(st); \ |
---|
510 | \ |
---|
511 | if (speed_option_verbose >= 2) \ |
---|
512 | printf ("%s attempt=%u, reps=%u, dt=%ld\n", \ |
---|
513 | name, attempt, reps, dt); \ |
---|
514 | \ |
---|
515 | if (dt >= 2) \ |
---|
516 | break; \ |
---|
517 | \ |
---|
518 | reps = (reps == 0 ? 1 : 2*reps); \ |
---|
519 | if (reps == 0) \ |
---|
520 | break; /* uint overflow, not normal */ \ |
---|
521 | } \ |
---|
522 | \ |
---|
523 | if (dt < half_tick) \ |
---|
524 | { \ |
---|
525 | result = 1; \ |
---|
526 | break; \ |
---|
527 | } \ |
---|
528 | } \ |
---|
529 | \ |
---|
530 | if (speed_option_verbose) \ |
---|
531 | { \ |
---|
532 | if (result) \ |
---|
533 | printf ("%s is microsecond accurate\n", name); \ |
---|
534 | else \ |
---|
535 | printf ("%s is only %s clock tick accurate\n", \ |
---|
536 | name, unittime_string (1.0/clk_tck())); \ |
---|
537 | } \ |
---|
538 | return result; \ |
---|
539 | } |
---|
540 | |
---|
541 | |
---|
542 | int |
---|
543 | gettimeofday_microseconds_p (void) |
---|
544 | { |
---|
545 | #define call_gettimeofday(t) gettimeofday (&(t), NULL) |
---|
546 | #define timeval_tv_sec(t) ((t).tv_sec) |
---|
547 | #define timeval_tv_usec(t) ((t).tv_usec) |
---|
548 | MICROSECONDS_P ("gettimeofday", struct_timeval, |
---|
549 | call_gettimeofday, timeval_tv_sec, timeval_tv_usec); |
---|
550 | } |
---|
551 | |
---|
552 | int |
---|
553 | getrusage_microseconds_p (void) |
---|
554 | { |
---|
555 | #define call_getrusage(t) getrusage (0, &(t)) |
---|
556 | #define rusage_tv_sec(t) ((t).ru_utime.tv_sec) |
---|
557 | #define rusage_tv_usec(t) ((t).ru_utime.tv_usec) |
---|
558 | MICROSECONDS_P ("getrusage", struct_rusage, |
---|
559 | call_getrusage, rusage_tv_sec, rusage_tv_usec); |
---|
560 | } |
---|
561 | |
---|
562 | |
---|
563 | /* CLOCK_PROCESS_CPUTIME_ID looks like it's going to be in a future version |
---|
564 | of glibc (some time post 2.2). |
---|
565 | |
---|
566 | CLOCK_VIRTUAL is process time, available in BSD systems (though sometimes |
---|
567 | defined, but returning -1 for an error). */ |
---|
568 | |
---|
569 | #ifdef CLOCK_PROCESS_CPUTIME_ID |
---|
570 | # define CGT_ID CLOCK_PROCESS_CPUTIME_ID |
---|
571 | #else |
---|
572 | # ifdef CLOCK_VIRTUAL |
---|
573 | # define CGT_ID CLOCK_VIRTUAL |
---|
574 | # endif |
---|
575 | #endif |
---|
576 | #ifdef CGT_ID |
---|
577 | # define HAVE_CGT_ID 1 |
---|
578 | #else |
---|
579 | # define HAVE_CGT_ID 0 |
---|
580 | # define CGT_ID (ASSERT_FAIL (CGT_ID not determined), -1) |
---|
581 | #endif |
---|
582 | |
---|
583 | int |
---|
584 | cgt_works_p (void) |
---|
585 | { |
---|
586 | static int result = -1; |
---|
587 | struct_timespec unit; |
---|
588 | |
---|
589 | if (! have_cgt) |
---|
590 | return 0; |
---|
591 | |
---|
592 | if (! HAVE_CGT_ID) |
---|
593 | { |
---|
594 | if (speed_option_verbose) |
---|
595 | printf ("clock_gettime don't know what ID to use\n"); |
---|
596 | result = 0; |
---|
597 | return result; |
---|
598 | } |
---|
599 | |
---|
600 | if (result != -1) |
---|
601 | return result; |
---|
602 | |
---|
603 | /* trial run to see if it works */ |
---|
604 | if (clock_gettime (CGT_ID, &unit) != 0) |
---|
605 | { |
---|
606 | if (speed_option_verbose) |
---|
607 | printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno)); |
---|
608 | result = 0; |
---|
609 | return result; |
---|
610 | } |
---|
611 | |
---|
612 | /* get the resolution */ |
---|
613 | if (clock_getres (CGT_ID, &unit) != 0) |
---|
614 | { |
---|
615 | if (speed_option_verbose) |
---|
616 | printf ("clock_getres id=%d error: %s\n", CGT_ID, strerror (errno)); |
---|
617 | result = 0; |
---|
618 | return result; |
---|
619 | } |
---|
620 | |
---|
621 | cgt_unittime = unit.tv_sec + unit.tv_nsec * 1e-9; |
---|
622 | printf ("clock_gettime is %s accurate\n", |
---|
623 | unittime_string (cgt_unittime)); |
---|
624 | result = 1; |
---|
625 | return result; |
---|
626 | } |
---|
627 | |
---|
628 | |
---|
629 | volatile unsigned *sgi_addr; |
---|
630 | |
---|
631 | int |
---|
632 | sgi_works_p (void) |
---|
633 | { |
---|
634 | #if HAVE_SYSSGI && HAVE_MMAP |
---|
635 | static int result = -1; |
---|
636 | |
---|
637 | size_t pagesize, offset; |
---|
638 | __psunsigned_t phys, physpage; |
---|
639 | void *virtpage; |
---|
640 | unsigned period_picoseconds; |
---|
641 | int size, fd; |
---|
642 | |
---|
643 | if (result != -1) |
---|
644 | return result; |
---|
645 | |
---|
646 | phys = syssgi (SGI_QUERY_CYCLECNTR, &period_picoseconds); |
---|
647 | if (phys == (__psunsigned_t) -1) |
---|
648 | { |
---|
649 | /* ENODEV is the error when a counter is not available */ |
---|
650 | if (speed_option_verbose) |
---|
651 | printf ("syssgi SGI_QUERY_CYCLECNTR error: %s\n", strerror (errno)); |
---|
652 | result = 0; |
---|
653 | return result; |
---|
654 | } |
---|
655 | sgi_unittime = period_picoseconds * 1e-12; |
---|
656 | |
---|
657 | /* IRIX 5 doesn't have SGI_CYCLECNTR_SIZE, assume 32 bits in that case. |
---|
658 | Challenge/ONYX hardware has a 64 bit byte counter, but there seems no |
---|
659 | obvious way to identify that without SGI_CYCLECNTR_SIZE. */ |
---|
660 | #ifdef SGI_CYCLECNTR_SIZE |
---|
661 | size = syssgi (SGI_CYCLECNTR_SIZE); |
---|
662 | if (size == -1) |
---|
663 | { |
---|
664 | if (speed_option_verbose) |
---|
665 | { |
---|
666 | printf ("syssgi SGI_CYCLECNTR_SIZE error: %s\n", strerror (errno)); |
---|
667 | printf (" will assume size==4\n"); |
---|
668 | } |
---|
669 | size = 32; |
---|
670 | } |
---|
671 | #else |
---|
672 | size = 32; |
---|
673 | #endif |
---|
674 | |
---|
675 | if (size < 32) |
---|
676 | { |
---|
677 | printf ("syssgi SGI_CYCLECNTR_SIZE gives %d, expected 32 or 64\n", size); |
---|
678 | result = 0; |
---|
679 | return result; |
---|
680 | } |
---|
681 | |
---|
682 | pagesize = getpagesize(); |
---|
683 | offset = (size_t) phys & (pagesize-1); |
---|
684 | physpage = phys - offset; |
---|
685 | |
---|
686 | /* shouldn't cross over a page boundary */ |
---|
687 | ASSERT_ALWAYS (offset + size/8 <= pagesize); |
---|
688 | |
---|
689 | fd = open("/dev/mmem", O_RDONLY); |
---|
690 | if (fd == -1) |
---|
691 | { |
---|
692 | if (speed_option_verbose) |
---|
693 | printf ("open /dev/mmem: %s\n", strerror (errno)); |
---|
694 | result = 0; |
---|
695 | return result; |
---|
696 | } |
---|
697 | |
---|
698 | virtpage = mmap (0, pagesize, PROT_READ, MAP_PRIVATE, fd, (off_t) physpage); |
---|
699 | if (virtpage == (void *) -1) |
---|
700 | { |
---|
701 | if (speed_option_verbose) |
---|
702 | printf ("mmap /dev/mmem: %s\n", strerror (errno)); |
---|
703 | result = 0; |
---|
704 | return result; |
---|
705 | } |
---|
706 | |
---|
707 | /* address of least significant 4 bytes, knowing mips is big endian */ |
---|
708 | sgi_addr = (unsigned *) ((char *) virtpage + offset |
---|
709 | + size/8 - sizeof(unsigned)); |
---|
710 | result = 1; |
---|
711 | return result; |
---|
712 | |
---|
713 | #else /* ! (HAVE_SYSSGI && HAVE_MMAP) */ |
---|
714 | return 0; |
---|
715 | #endif |
---|
716 | } |
---|
717 | |
---|
718 | |
---|
719 | #define DEFAULT(var,n) \ |
---|
720 | do { \ |
---|
721 | if (! (var)) \ |
---|
722 | (var) = (n); \ |
---|
723 | } while (0) |
---|
724 | |
---|
725 | void |
---|
726 | speed_time_init (void) |
---|
727 | { |
---|
728 | double supplement_unittime = 0.0; |
---|
729 | |
---|
730 | static int speed_time_initialized = 0; |
---|
731 | if (speed_time_initialized) |
---|
732 | return; |
---|
733 | speed_time_initialized = 1; |
---|
734 | |
---|
735 | speed_cycletime_init (); |
---|
736 | |
---|
737 | if (have_cycles && cycles_works_p ()) |
---|
738 | { |
---|
739 | use_cycles = 1; |
---|
740 | DEFAULT (speed_cycletime, 1.0); |
---|
741 | speed_unittime = speed_cycletime; |
---|
742 | DEFAULT (speed_precision, 10000); |
---|
743 | strcpy (speed_time_string, "CPU cycle counter"); |
---|
744 | |
---|
745 | /* only used if a supplementary method is chosen below */ |
---|
746 | cycles_limit = (have_cycles == 1 ? M_2POW32 : M_2POW64) / 2.0 |
---|
747 | * speed_cycletime; |
---|
748 | |
---|
749 | if (have_grus && getrusage_microseconds_p()) |
---|
750 | { |
---|
751 | /* this is a good combination */ |
---|
752 | use_grus = 1; |
---|
753 | supplement_unittime = grus_unittime = 1.0e-6; |
---|
754 | strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond getrusage()"); |
---|
755 | } |
---|
756 | else if (have_cycles == 1) |
---|
757 | { |
---|
758 | /* When speed_cyclecounter has a limited range, look for something |
---|
759 | to supplement it. */ |
---|
760 | if (have_gtod && gettimeofday_microseconds_p()) |
---|
761 | { |
---|
762 | use_gtod = 1; |
---|
763 | supplement_unittime = gtod_unittime = 1.0e-6; |
---|
764 | strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond gettimeofday()"); |
---|
765 | } |
---|
766 | else if (have_grus) |
---|
767 | { |
---|
768 | use_grus = 1; |
---|
769 | supplement_unittime = grus_unittime = 1.0 / (double) clk_tck (); |
---|
770 | sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick getrusage()", unittime_string (supplement_unittime)); |
---|
771 | } |
---|
772 | else if (have_times) |
---|
773 | { |
---|
774 | use_times = 1; |
---|
775 | supplement_unittime = times_unittime = 1.0 / (double) clk_tck (); |
---|
776 | sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick times()", unittime_string (supplement_unittime)); |
---|
777 | } |
---|
778 | else if (have_gtod) |
---|
779 | { |
---|
780 | use_gtod = 1; |
---|
781 | supplement_unittime = gtod_unittime = 1.0 / (double) clk_tck (); |
---|
782 | sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick gettimeofday()", unittime_string (supplement_unittime)); |
---|
783 | } |
---|
784 | else |
---|
785 | { |
---|
786 | fprintf (stderr, "WARNING: cycle counter is 32 bits and there's no other functions.\n"); |
---|
787 | fprintf (stderr, " Wraparounds may produce bad results on long measurements.\n"); |
---|
788 | } |
---|
789 | } |
---|
790 | |
---|
791 | if (use_grus || use_times || use_gtod) |
---|
792 | { |
---|
793 | /* must know cycle period to compare cycles to other measuring |
---|
794 | (via cycles_limit) */ |
---|
795 | speed_cycletime_need_seconds (); |
---|
796 | |
---|
797 | if (speed_precision * supplement_unittime > cycles_limit) |
---|
798 | { |
---|
799 | fprintf (stderr, "WARNING: requested precision can't always be achieved due to limited range\n"); |
---|
800 | fprintf (stderr, " cycle counter and limited precision supplemental method\n"); |
---|
801 | fprintf (stderr, " (%s)\n", speed_time_string); |
---|
802 | } |
---|
803 | } |
---|
804 | } |
---|
805 | else if (have_stck) |
---|
806 | { |
---|
807 | strcpy (speed_time_string, "STCK timestamp"); |
---|
808 | /* stck is in units of 2^-12 microseconds, which is very likely higher |
---|
809 | resolution than a cpu cycle */ |
---|
810 | if (speed_cycletime == 0.0) |
---|
811 | speed_cycletime_fail |
---|
812 | ("Need to know CPU frequency for effective stck unit"); |
---|
813 | speed_unittime = MAX (speed_cycletime, STCK_PERIOD); |
---|
814 | DEFAULT (speed_precision, 10000); |
---|
815 | } |
---|
816 | else if (have_sgi && sgi_works_p ()) |
---|
817 | { |
---|
818 | use_sgi = 1; |
---|
819 | DEFAULT (speed_precision, 10000); |
---|
820 | speed_unittime = sgi_unittime; |
---|
821 | sprintf (speed_time_string, "syssgi() mmap counter (%s), supplemented by millisecond getrusage()", |
---|
822 | unittime_string (speed_unittime)); |
---|
823 | /* supplemented with getrusage, which we assume to have 1ms resolution */ |
---|
824 | use_grus = 1; |
---|
825 | supplement_unittime = 1e-3; |
---|
826 | } |
---|
827 | else if (have_rrt) |
---|
828 | { |
---|
829 | timebasestruct_t t; |
---|
830 | use_rrt = 1; |
---|
831 | DEFAULT (speed_precision, 10000); |
---|
832 | read_real_time (&t, sizeof(t)); |
---|
833 | switch (t.flag) { |
---|
834 | case RTC_POWER: |
---|
835 | /* FIXME: What's the actual RTC resolution? */ |
---|
836 | speed_unittime = 1e-7; |
---|
837 | strcpy (speed_time_string, "read_real_time() power nanoseconds"); |
---|
838 | break; |
---|
839 | case RTC_POWER_PC: |
---|
840 | t.tb_high = 1; |
---|
841 | t.tb_low = 0; |
---|
842 | time_base_to_time (&t, sizeof(t)); |
---|
843 | speed_unittime = TIMEBASESTRUCT_SECS(&t) / M_2POW32; |
---|
844 | sprintf (speed_time_string, "%s read_real_time() powerpc ticks", |
---|
845 | unittime_string (speed_unittime)); |
---|
846 | break; |
---|
847 | default: |
---|
848 | fprintf (stderr, "ERROR: Unrecognised timebasestruct_t flag=%d\n", |
---|
849 | t.flag); |
---|
850 | abort (); |
---|
851 | } |
---|
852 | } |
---|
853 | else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5e-6) |
---|
854 | { |
---|
855 | /* use clock_gettime if microsecond or better resolution */ |
---|
856 | choose_cgt: |
---|
857 | use_cgt = 1; |
---|
858 | speed_unittime = cgt_unittime; |
---|
859 | DEFAULT (speed_precision, (cgt_unittime <= 0.1e-6 ? 10000 : 1000)); |
---|
860 | strcpy (speed_time_string, "microsecond accurate getrusage()"); |
---|
861 | } |
---|
862 | else if (have_grus && getrusage_microseconds_p()) |
---|
863 | { |
---|
864 | use_grus = 1; |
---|
865 | speed_unittime = grus_unittime = 1.0e-6; |
---|
866 | DEFAULT (speed_precision, 1000); |
---|
867 | strcpy (speed_time_string, "microsecond accurate getrusage()"); |
---|
868 | } |
---|
869 | else if (have_gtod && gettimeofday_microseconds_p()) |
---|
870 | { |
---|
871 | use_gtod = 1; |
---|
872 | speed_unittime = gtod_unittime = 1.0e-6; |
---|
873 | DEFAULT (speed_precision, 1000); |
---|
874 | strcpy (speed_time_string, "microsecond accurate gettimeofday()"); |
---|
875 | } |
---|
876 | else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5/clk_tck()) |
---|
877 | { |
---|
878 | /* use clock_gettime if 1 tick or better resolution */ |
---|
879 | goto choose_cgt; |
---|
880 | } |
---|
881 | else if (have_times) |
---|
882 | { |
---|
883 | use_times = 1; |
---|
884 | use_tick_boundary = 1; |
---|
885 | speed_unittime = times_unittime = 1.0 / (double) clk_tck (); |
---|
886 | DEFAULT (speed_precision, 200); |
---|
887 | sprintf (speed_time_string, "%s clock tick times()", |
---|
888 | unittime_string (speed_unittime)); |
---|
889 | } |
---|
890 | else if (have_grus) |
---|
891 | { |
---|
892 | use_grus = 1; |
---|
893 | use_tick_boundary = 1; |
---|
894 | speed_unittime = grus_unittime = 1.0 / (double) clk_tck (); |
---|
895 | DEFAULT (speed_precision, 200); |
---|
896 | sprintf (speed_time_string, "%s clock tick getrusage()\n", |
---|
897 | unittime_string (speed_unittime)); |
---|
898 | } |
---|
899 | else if (have_gtod) |
---|
900 | { |
---|
901 | use_gtod = 1; |
---|
902 | use_tick_boundary = 1; |
---|
903 | speed_unittime = gtod_unittime = 1.0 / (double) clk_tck (); |
---|
904 | DEFAULT (speed_precision, 200); |
---|
905 | sprintf (speed_time_string, "%s clock tick gettimeofday()", |
---|
906 | unittime_string (speed_unittime)); |
---|
907 | } |
---|
908 | else |
---|
909 | { |
---|
910 | fprintf (stderr, "No time measuring method available\n"); |
---|
911 | fprintf (stderr, "None of: speed_cyclecounter(), STCK(), getrusage(), gettimeofday(), times()\n"); |
---|
912 | abort (); |
---|
913 | } |
---|
914 | |
---|
915 | if (speed_option_verbose) |
---|
916 | { |
---|
917 | printf ("speed_time_init: %s\n", speed_time_string); |
---|
918 | printf (" speed_precision %d\n", speed_precision); |
---|
919 | printf (" speed_unittime %.2g\n", speed_unittime); |
---|
920 | if (supplement_unittime) |
---|
921 | printf (" supplement_unittime %.2g\n", supplement_unittime); |
---|
922 | printf (" use_tick_boundary %d\n", use_tick_boundary); |
---|
923 | if (have_cycles) |
---|
924 | printf (" cycles_limit %.2g seconds\n", cycles_limit); |
---|
925 | } |
---|
926 | } |
---|
927 | |
---|
928 | |
---|
929 | |
---|
930 | /* Burn up CPU until a clock tick boundary, for greater accuracy. Set the |
---|
931 | corresponding "start_foo" appropriately too. */ |
---|
932 | |
---|
933 | void |
---|
934 | grus_tick_boundary (void) |
---|
935 | { |
---|
936 | struct_rusage prev; |
---|
937 | getrusage (0, &prev); |
---|
938 | do { |
---|
939 | getrusage (0, &start_grus); |
---|
940 | } while (start_grus.ru_utime.tv_usec == prev.ru_utime.tv_usec); |
---|
941 | } |
---|
942 | |
---|
943 | void |
---|
944 | gtod_tick_boundary (void) |
---|
945 | { |
---|
946 | struct_timeval prev; |
---|
947 | gettimeofday (&prev, NULL); |
---|
948 | do { |
---|
949 | gettimeofday (&start_gtod, NULL); |
---|
950 | } while (start_gtod.tv_usec == prev.tv_usec); |
---|
951 | } |
---|
952 | |
---|
953 | void |
---|
954 | times_tick_boundary (void) |
---|
955 | { |
---|
956 | struct_tms prev; |
---|
957 | times (&prev); |
---|
958 | do |
---|
959 | times (&start_times); |
---|
960 | while (start_times.tms_utime == prev.tms_utime); |
---|
961 | } |
---|
962 | |
---|
963 | |
---|
964 | /* "have_" values are tested to let unused code go dead. */ |
---|
965 | |
---|
966 | void |
---|
967 | speed_starttime (void) |
---|
968 | { |
---|
969 | speed_time_init (); |
---|
970 | |
---|
971 | if (have_grus && use_grus) |
---|
972 | { |
---|
973 | if (use_tick_boundary) |
---|
974 | grus_tick_boundary (); |
---|
975 | else |
---|
976 | getrusage (0, &start_grus); |
---|
977 | } |
---|
978 | |
---|
979 | if (have_gtod && use_gtod) |
---|
980 | { |
---|
981 | if (use_tick_boundary) |
---|
982 | gtod_tick_boundary (); |
---|
983 | else |
---|
984 | gettimeofday (&start_gtod, NULL); |
---|
985 | } |
---|
986 | |
---|
987 | if (have_times && use_times) |
---|
988 | { |
---|
989 | if (use_tick_boundary) |
---|
990 | times_tick_boundary (); |
---|
991 | else |
---|
992 | times (&start_times); |
---|
993 | } |
---|
994 | |
---|
995 | if (have_cgt && use_cgt) |
---|
996 | clock_gettime (CGT_ID, &start_cgt); |
---|
997 | |
---|
998 | if (have_rrt && use_rrt) |
---|
999 | read_real_time (&start_rrt, sizeof(start_rrt)); |
---|
1000 | |
---|
1001 | if (have_sgi && use_sgi) |
---|
1002 | start_sgi = *sgi_addr; |
---|
1003 | |
---|
1004 | if (have_stck && use_stck) |
---|
1005 | STCK (start_stck); |
---|
1006 | |
---|
1007 | /* Cycles sampled last for maximum accuracy. */ |
---|
1008 | if (have_cycles && use_cycles) |
---|
1009 | speed_cyclecounter (start_cycles); |
---|
1010 | } |
---|
1011 | |
---|
1012 | |
---|
1013 | /* Calculate the difference between two cycle counter samples, as a "double" |
---|
1014 | counter of cycles. |
---|
1015 | |
---|
1016 | The start and end values are allowed to cancel in integers in case the |
---|
1017 | counter values are bigger than the 53 bits that normally fit in a double. |
---|
1018 | |
---|
1019 | This works even if speed_cyclecounter() puts a value bigger than 32-bits |
---|
1020 | in the low word (the high word always gets a 2**32 multiplier though). */ |
---|
1021 | |
---|
1022 | double |
---|
1023 | speed_cyclecounter_diff (const unsigned end[2], const unsigned start[2]) |
---|
1024 | { |
---|
1025 | unsigned d; |
---|
1026 | double t; |
---|
1027 | |
---|
1028 | if (have_cycles == 1) |
---|
1029 | { |
---|
1030 | t = (end[0] - start[0]); |
---|
1031 | } |
---|
1032 | else |
---|
1033 | { |
---|
1034 | d = end[0] - start[0]; |
---|
1035 | t = d - (d > end[0] ? M_2POWU : 0.0); |
---|
1036 | t += (end[1] - start[1]) * M_2POW32; |
---|
1037 | } |
---|
1038 | return t; |
---|
1039 | } |
---|
1040 | |
---|
1041 | |
---|
1042 | /* Calculate the difference between "start" and "end" using fields "sec" and |
---|
1043 | "psec", where each "psec" is a "punit" of a second. |
---|
1044 | |
---|
1045 | The seconds parts are allowed to cancel before being combined with the |
---|
1046 | psec parts, in case a simple "sec+psec*punit" exceeds the precision of a |
---|
1047 | double. |
---|
1048 | |
---|
1049 | Total time is only calculated in a "double" since an integer count of |
---|
1050 | psecs might overflow. 2^32 microseconds is only a bit over an hour, or |
---|
1051 | 2^32 nanoseconds only about 4 seconds. |
---|
1052 | |
---|
1053 | The casts to "long" are for the beneifit of timebasestruct_t, where the |
---|
1054 | fields are only "unsigned int", but we want a signed difference. */ |
---|
1055 | |
---|
1056 | #define DIFF_SECS_ROUTINE(sec, psec, punit) \ |
---|
1057 | { \ |
---|
1058 | long sec_diff, psec_diff; \ |
---|
1059 | sec_diff = (long) end->sec - (long) start->sec; \ |
---|
1060 | psec_diff = (long) end->psec - (long) start->psec; \ |
---|
1061 | return (double) sec_diff + punit * (double) psec_diff; \ |
---|
1062 | } |
---|
1063 | |
---|
1064 | double |
---|
1065 | timeval_diff_secs (const struct_timeval *end, const struct_timeval *start) |
---|
1066 | { |
---|
1067 | DIFF_SECS_ROUTINE (tv_sec, tv_usec, 1e-6); |
---|
1068 | } |
---|
1069 | |
---|
1070 | double |
---|
1071 | rusage_diff_secs (const struct_rusage *end, const struct_rusage *start) |
---|
1072 | { |
---|
1073 | DIFF_SECS_ROUTINE (ru_utime.tv_sec, ru_utime.tv_usec, 1e-6); |
---|
1074 | } |
---|
1075 | |
---|
1076 | double |
---|
1077 | timespec_diff_secs (const struct_timespec *end, const struct_timespec *start) |
---|
1078 | { |
---|
1079 | DIFF_SECS_ROUTINE (tv_sec, tv_nsec, 1e-9); |
---|
1080 | } |
---|
1081 | |
---|
1082 | /* This is for use after time_base_to_time, ie. for seconds and nanoseconds. */ |
---|
1083 | double |
---|
1084 | timebasestruct_diff_secs (const timebasestruct_t *end, |
---|
1085 | const timebasestruct_t *start) |
---|
1086 | { |
---|
1087 | DIFF_SECS_ROUTINE (tb_high, tb_low, 1e-9); |
---|
1088 | } |
---|
1089 | |
---|
1090 | |
---|
1091 | double |
---|
1092 | speed_endtime (void) |
---|
1093 | { |
---|
1094 | #define END_USE(name,value) \ |
---|
1095 | do { \ |
---|
1096 | if (speed_option_verbose >= 3) \ |
---|
1097 | printf ("speed_endtime(): used %s\n", name); \ |
---|
1098 | result = value; \ |
---|
1099 | goto done; \ |
---|
1100 | } while (0) |
---|
1101 | |
---|
1102 | #define END_ENOUGH(name,value) \ |
---|
1103 | do { \ |
---|
1104 | if (speed_option_verbose >= 3) \ |
---|
1105 | printf ("speed_endtime(): %s gives enough precision\n", name); \ |
---|
1106 | result = value; \ |
---|
1107 | goto done; \ |
---|
1108 | } while (0) |
---|
1109 | |
---|
1110 | #define END_EXCEED(name,value) \ |
---|
1111 | do { \ |
---|
1112 | if (speed_option_verbose >= 3) \ |
---|
1113 | printf ("speed_endtime(): cycle counter limit exceeded, used %s\n", \ |
---|
1114 | name); \ |
---|
1115 | result = value; \ |
---|
1116 | goto done; \ |
---|
1117 | } while (0) |
---|
1118 | |
---|
1119 | unsigned end_cycles[2]; |
---|
1120 | stck_t end_stck; |
---|
1121 | unsigned end_sgi; |
---|
1122 | timebasestruct_t end_rrt; |
---|
1123 | struct_timespec end_cgt; |
---|
1124 | struct_timeval end_gtod; |
---|
1125 | struct_rusage end_grus; |
---|
1126 | struct_tms end_times; |
---|
1127 | double t_gtod, t_grus, t_times, t_cgt; |
---|
1128 | double t_rrt, t_sgi, t_stck, t_cycles; |
---|
1129 | double result; |
---|
1130 | |
---|
1131 | /* Cycles sampled first for maximum accuracy. |
---|
1132 | "have_" values tested to let unused code go dead. */ |
---|
1133 | |
---|
1134 | if (have_cycles && use_cycles) speed_cyclecounter (end_cycles); |
---|
1135 | if (have_stck && use_stck) STCK (end_stck); |
---|
1136 | if (have_sgi && use_sgi) end_sgi = *sgi_addr; |
---|
1137 | if (have_rrt && use_rrt) read_real_time (&end_rrt, sizeof(end_rrt)); |
---|
1138 | if (have_cgt && use_cgt) clock_gettime (CGT_ID, &end_cgt); |
---|
1139 | if (have_gtod && use_gtod) gettimeofday (&end_gtod, NULL); |
---|
1140 | if (have_grus && use_grus) getrusage (0, &end_grus); |
---|
1141 | if (have_times && use_times) times (&end_times); |
---|
1142 | |
---|
1143 | result = -1.0; |
---|
1144 | |
---|
1145 | if (speed_option_verbose >= 4) |
---|
1146 | { |
---|
1147 | printf ("speed_endtime():\n"); |
---|
1148 | if (use_cycles) |
---|
1149 | printf (" cycles 0x%X,0x%X -> 0x%X,0x%X\n", |
---|
1150 | start_cycles[1], start_cycles[0], |
---|
1151 | end_cycles[1], end_cycles[0]); |
---|
1152 | |
---|
1153 | if (use_stck) |
---|
1154 | printf (" stck 0x%lX -> 0x%lX\n", start_stck, end_stck); |
---|
1155 | |
---|
1156 | if (use_sgi) |
---|
1157 | printf (" sgi 0x%X -> 0x%X\n", start_sgi, end_sgi); |
---|
1158 | |
---|
1159 | if (use_rrt) |
---|
1160 | printf (" read_real_time (%d)%u,%u -> (%d)%u,%u\n", |
---|
1161 | start_rrt.flag, start_rrt.tb_high, start_rrt.tb_low, |
---|
1162 | end_rrt.flag, end_rrt.tb_high, end_rrt.tb_low); |
---|
1163 | |
---|
1164 | if (use_cgt) |
---|
1165 | printf (" clock_gettime %ld.%09ld -> %ld.%09ld\n", |
---|
1166 | start_cgt.tv_sec, start_cgt.tv_nsec, |
---|
1167 | end_cgt.tv_sec, end_cgt.tv_nsec); |
---|
1168 | |
---|
1169 | if (use_gtod) |
---|
1170 | printf (" gettimeofday %ld.%06ld -> %ld.%06ld\n", |
---|
1171 | start_gtod.tv_sec, start_gtod.tv_usec, |
---|
1172 | end_gtod.tv_sec, end_gtod.tv_usec); |
---|
1173 | |
---|
1174 | if (use_grus) |
---|
1175 | printf (" getrusage %ld.%06ld -> %ld.%06ld\n", |
---|
1176 | start_grus.ru_utime.tv_sec, start_grus.ru_utime.tv_usec, |
---|
1177 | end_grus.ru_utime.tv_sec, end_grus.ru_utime.tv_usec); |
---|
1178 | |
---|
1179 | if (use_times) |
---|
1180 | printf (" times %ld -> %ld\n", |
---|
1181 | start_times.tms_utime, end_times.tms_utime); |
---|
1182 | } |
---|
1183 | |
---|
1184 | if (use_rrt) |
---|
1185 | { |
---|
1186 | time_base_to_time (&start_rrt, sizeof(start_rrt)); |
---|
1187 | time_base_to_time (&end_rrt, sizeof(end_rrt)); |
---|
1188 | t_rrt = timebasestruct_diff_secs (&end_rrt, &start_rrt); |
---|
1189 | END_USE ("read_real_time()", t_rrt); |
---|
1190 | } |
---|
1191 | |
---|
1192 | if (use_cgt) |
---|
1193 | { |
---|
1194 | t_cgt = timespec_diff_secs (&end_cgt, &start_cgt); |
---|
1195 | END_USE ("clock_gettime()", t_cgt); |
---|
1196 | } |
---|
1197 | |
---|
1198 | if (use_grus) |
---|
1199 | { |
---|
1200 | t_grus = rusage_diff_secs (&end_grus, &start_grus); |
---|
1201 | |
---|
1202 | /* Use getrusage() if the cycle counter limit would be exceeded, or if |
---|
1203 | it provides enough accuracy already. */ |
---|
1204 | if (use_cycles) |
---|
1205 | { |
---|
1206 | if (t_grus >= speed_precision*grus_unittime) |
---|
1207 | END_ENOUGH ("getrusage()", t_grus); |
---|
1208 | if (t_grus >= cycles_limit) |
---|
1209 | END_EXCEED ("getrusage()", t_grus); |
---|
1210 | } |
---|
1211 | } |
---|
1212 | |
---|
1213 | if (use_times) |
---|
1214 | { |
---|
1215 | t_times = (end_times.tms_utime - start_times.tms_utime) * times_unittime; |
---|
1216 | |
---|
1217 | /* Use times() if the cycle counter limit would be exceeded, or if |
---|
1218 | it provides enough accuracy already. */ |
---|
1219 | if (use_cycles) |
---|
1220 | { |
---|
1221 | if (t_times >= speed_precision*times_unittime) |
---|
1222 | END_ENOUGH ("times()", t_times); |
---|
1223 | if (t_times >= cycles_limit) |
---|
1224 | END_EXCEED ("times()", t_times); |
---|
1225 | } |
---|
1226 | } |
---|
1227 | |
---|
1228 | if (use_gtod) |
---|
1229 | { |
---|
1230 | t_gtod = timeval_diff_secs (&end_gtod, &start_gtod); |
---|
1231 | |
---|
1232 | /* Use gettimeofday() if it measured a value bigger than the cycle |
---|
1233 | counter can handle. */ |
---|
1234 | if (use_cycles) |
---|
1235 | { |
---|
1236 | if (t_gtod >= cycles_limit) |
---|
1237 | END_EXCEED ("gettimeofday()", t_gtod); |
---|
1238 | } |
---|
1239 | } |
---|
1240 | |
---|
1241 | if (use_stck) |
---|
1242 | { |
---|
1243 | t_stck = (end_stck - start_stck) * STCK_PERIOD; |
---|
1244 | END_USE ("stck", t_stck); |
---|
1245 | } |
---|
1246 | |
---|
1247 | if (use_sgi) |
---|
1248 | { |
---|
1249 | t_sgi = (end_sgi - start_sgi) * sgi_unittime; |
---|
1250 | END_USE ("SGI hardware counter", t_sgi); |
---|
1251 | } |
---|
1252 | |
---|
1253 | if (use_cycles) |
---|
1254 | { |
---|
1255 | t_cycles = speed_cyclecounter_diff (end_cycles, start_cycles) |
---|
1256 | * speed_cycletime; |
---|
1257 | END_USE ("cycle counter", t_cycles); |
---|
1258 | } |
---|
1259 | |
---|
1260 | if (use_grus && getrusage_microseconds_p()) |
---|
1261 | END_USE ("getrusage()", t_grus); |
---|
1262 | |
---|
1263 | if (use_gtod && gettimeofday_microseconds_p()) |
---|
1264 | END_USE ("gettimeofday()", t_gtod); |
---|
1265 | |
---|
1266 | if (use_times) END_USE ("times()", t_times); |
---|
1267 | if (use_grus) END_USE ("getrusage()", t_grus); |
---|
1268 | if (use_gtod) END_USE ("gettimeofday()", t_gtod); |
---|
1269 | |
---|
1270 | fprintf (stderr, "speed_endtime(): oops, no time method available\n"); |
---|
1271 | abort (); |
---|
1272 | |
---|
1273 | done: |
---|
1274 | if (result < 0.0) |
---|
1275 | { |
---|
1276 | fprintf (stderr, |
---|
1277 | "speed_endtime(): fatal error: negative time measured: %.9f\n", |
---|
1278 | result); |
---|
1279 | abort (); |
---|
1280 | } |
---|
1281 | return result; |
---|
1282 | } |
---|