source: trunk/third/gmp/tune/speed-ext.c @ 18191

Revision 18191, 6.8 KB checked in by ghudson, 22 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r18190, which included commits to RCS files with non-trunk default branches.
Line 
1/* An example of extending the speed program to measure routines not in GMP. */
2
3/*
4Copyright 1999, 2000 Free Software Foundation, Inc.
5
6This file is part of the GNU MP Library.
7
8The GNU MP Library is free software; you can redistribute it and/or modify
9it under the terms of the GNU Lesser General Public License as published by
10the Free Software Foundation; either version 2.1 of the License, or (at your
11option) any later version.
12
13The GNU MP Library is distributed in the hope that it will be useful, but
14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16License for more details.
17
18You should have received a copy of the GNU Lesser General Public License
19along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
20the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21MA 02111-1307, USA.
22*/
23
24
25/* The extension here is three versions of an mpn arithmetic mean.  These
26   aren't meant to be particularly useful, just examples.
27
28   You can run something like the following to compare their speeds.
29
30           ./speed-ext -s 1-20 -c mean_calls mean_open mean_open2
31
32   On RISC chips, mean_open() might be fastest if the compiler is doing a
33   good job.  On the register starved x86s, mean_calls will be fastest.
34
35
36   Notes:
37
38   SPEED_EXTRA_PROTOS and SPEED_EXTRA_ROUTINES are macros that get expanded
39   by speed.c in useful places.  SPEED_EXTRA_PROTOS goes after the header
40   files, and SPEED_EXTRA_ROUTINES goes in the array of available routines.
41
42   The advantage of this #include "speed.c" scheme is that there's no
43   editing of a copy of that file, and new features in new versions of it
44   will be immediately available.
45
46   In a real program the routines mean_calls() etc would probably be in
47   separate C or assembler source files, and just the measuring
48   speed_mean_calls() etc would be here.  Linking against other libraries
49   for things to measure is perfectly possible too.
50
51   When attempting to compare two versions of the same named routine, say
52   like the generic and assembler versions of mpn_add_n(), creative use of
53   cc -D or #define is suggested, so one or both can be renamed and linked
54   into the same program.  It'll be much easier to compare them side by side
55   than with separate programs for each.
56
57   common.c has notes on writing speed measuring routines.
58
59   Remember to link against tune/libspeed.la (or tune/.libs/libspeed.a if
60   not using libtool) to get common.o and other objects needed by speed.c.  */
61
62
63#define SPEED_EXTRA_PROTOS                              \
64  double speed_mean_calls (struct speed_params *s);     \
65  double speed_mean_open  (struct speed_params *s);     \
66  double speed_mean_open2 (struct speed_params *s);
67
68#define SPEED_EXTRA_ROUTINES            \
69  { "mean_calls",  speed_mean_calls  }, \
70  { "mean_open",   speed_mean_open   }, \
71  { "mean_open2",  speed_mean_open2  },
72
73#include "speed.c"
74
75
76/* A straightforward implementation calling mpn subroutines.
77
78   wp,size is set to (xp,size + yp,size) / 2.  The return value is the
79   remainder from the division.  The other versions are the same.  */
80
81mp_limb_t
82mean_calls (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
83{
84  mp_limb_t  c, ret;
85
86  ASSERT (size >= 1);
87
88  c = mpn_add_n (wp, xp, yp, size);
89  ret = mpn_rshift (wp, wp, size, 1) >> (BITS_PER_MP_LIMB-1);
90  wp[size-1] |= (c << (BITS_PER_MP_LIMB-1));
91  return ret;
92}
93
94
95/* An open-coded version, making one pass over the data.  The right shift is
96   done as the added limbs are produced.  The addition code follows
97   mpn/generic/add_n.c. */
98
99mp_limb_t
100mean_open (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
101{
102  mp_limb_t  w, wprev, x, y, c, ret;
103  mp_size_t  i;
104
105  ASSERT (size >= 1);
106
107  x = xp[0];
108  y = yp[0];
109
110  wprev = x + y;
111  c = (wprev < x);
112  ret = (wprev & 1);
113
114#define RSHIFT(hi,lo)   (((lo) >> 1) | ((hi) << (BITS_PER_MP_LIMB-1)))
115
116  for (i = 1; i < size; i++)
117    {
118      x = xp[i];
119      y = yp[i];
120
121      w = x + c;
122      c = (w < x);
123      w += y;
124      c += (w < y);
125
126      wp[i-1] = RSHIFT (w, wprev);
127      wprev = w;
128    }
129
130  wp[i-1] = RSHIFT (c, wprev);
131
132  return ret;
133}
134
135
136/* Another one-pass version, but right shifting the source limbs rather than
137   the result limbs.  There's not much chance of this being better than the
138   above, but it's an alternative at least. */
139
140mp_limb_t
141mean_open2 (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
142{
143  mp_limb_t  w, x, y, xnext, ynext, c, ret;
144  mp_size_t  i;
145
146  ASSERT (size >= 1);
147
148  x = xp[0];
149  y = yp[0];
150
151  /* ret is the low bit of x+y, c is the carry out of that low bit add */
152  ret = (x ^ y) & 1;
153  c   = (x & y) & 1;
154
155  for (i = 0; i < size-1; i++)
156    {
157      xnext = xp[i+1];
158      ynext = yp[i+1];
159      x = RSHIFT (xnext, x);
160      y = RSHIFT (ynext, y);
161
162      w = x + c;
163      c = (w < x);
164      w += y;
165      c += (w < y);
166      wp[i] = w;
167
168      x = xnext;
169      y = ynext;
170    }
171
172  wp[i] = (x >> 1) + (y >> 1) + c;
173
174  return ret;
175}
176
177
178/* The speed measuring routines are the same apart from which function they
179   run, so a macro is used.  Actually this macro is the same as
180   SPEED_ROUTINE_MPN_BINARY_N.  */
181
182#define SPEED_ROUTINE_MEAN(mean_fun)                    \
183  {                                                     \
184    unsigned  i;                                        \
185    mp_ptr    wp;                                       \
186    double    t;                                        \
187    TMP_DECL (marker);                                  \
188                                                        \
189    SPEED_RESTRICT_COND (s->size >= 1);                 \
190                                                        \
191    TMP_MARK (marker);                                  \
192    wp = SPEED_TMP_ALLOC_LIMBS (s->size, s->align_wp);  \
193                                                        \
194    speed_operand_src (s, s->xp, s->size);              \
195    speed_operand_src (s, s->yp, s->size);              \
196    speed_operand_dst (s, wp, s->size);                 \
197    speed_cache_fill (s);                               \
198                                                        \
199    speed_starttime ();                                 \
200    i = s->reps;                                        \
201    do                                                  \
202      mean_fun (wp, s->xp, s->yp, s->size);             \
203    while (--i != 0);                                   \
204    t = speed_endtime ();                               \
205                                                        \
206    TMP_FREE (marker);                                  \
207    return t;                                           \
208  }
209
210double
211speed_mean_calls (struct speed_params *s)
212{
213  SPEED_ROUTINE_MEAN (mean_calls);
214}
215
216double
217speed_mean_open (struct speed_params *s)
218{
219  SPEED_ROUTINE_MEAN (mean_open);
220}
221
222double
223speed_mean_open2 (struct speed_params *s)
224{
225  SPEED_ROUTINE_MEAN (mean_open2);
226}
Note: See TracBrowser for help on using the repository browser.