1 | #! /usr/bin/perl -w |
---|
2 | |
---|
3 | # Copyright 2000, 2001, 2002 Free Software Foundation, Inc. |
---|
4 | # |
---|
5 | # This file is part of the GNU MP Library. |
---|
6 | # |
---|
7 | # The GNU MP Library is free software; you can redistribute it and/or modify |
---|
8 | # it under the terms of the GNU Lesser General Public License as published by |
---|
9 | # the Free Software Foundation; either version 2.1 of the License, or (at your |
---|
10 | # option) any later version. |
---|
11 | # |
---|
12 | # The GNU MP Library is distributed in the hope that it will be useful, but |
---|
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
---|
14 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
---|
15 | # License for more details. |
---|
16 | # |
---|
17 | # You should have received a copy of the GNU Lesser General Public License |
---|
18 | # along with the GNU MP Library; see the file COPYING.LIB. If not, write to |
---|
19 | # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
---|
20 | # MA 02111-1307, USA. |
---|
21 | |
---|
22 | |
---|
23 | # Usage: cd $builddir/tune |
---|
24 | # perl $srcdir/tune/many.pl [-t] <files/dirs>... |
---|
25 | # |
---|
26 | # Output: speed-many.c |
---|
27 | # try-many.c |
---|
28 | # Makefile.many |
---|
29 | # |
---|
30 | # Make alternate versions of various mpn routines available for measuring |
---|
31 | # and testing. |
---|
32 | # |
---|
33 | # The $srcdir and $builddir in the invocation above just means the script |
---|
34 | # lives in the tune source directory, but should be run in the tune build |
---|
35 | # directory. When not using a separate object directory this just becomes |
---|
36 | # |
---|
37 | # cd tune |
---|
38 | # perl many.pl [-t] <files/dirs>... |
---|
39 | # |
---|
40 | # |
---|
41 | # SINGLE FILES |
---|
42 | # |
---|
43 | # Suppose $HOME/newcode/mul_1_experiment.asm is a new implementation of |
---|
44 | # mpn_mul_1, then |
---|
45 | # |
---|
46 | # cd $builddir/tune |
---|
47 | # perl $srcdir/tune/many.pl $HOME/newcode/mul_1_experiment.asm |
---|
48 | # |
---|
49 | # will produce rules and renaming so that a speed program incorporating it |
---|
50 | # can be built, |
---|
51 | # |
---|
52 | # make -f Makefile.many speed-many |
---|
53 | # |
---|
54 | # then for example it can be compared to the standard mul_1, |
---|
55 | # |
---|
56 | # ./speed-many -s 1-30 mpn_mul_1 mpn_mul_1_experiment |
---|
57 | # |
---|
58 | # An expanded try program can be used to check correctness, |
---|
59 | # |
---|
60 | # make -f Makefile.many try-many |
---|
61 | # |
---|
62 | # and run |
---|
63 | # |
---|
64 | # ./try-many mpn_mul_1_experiment |
---|
65 | # |
---|
66 | # Files can be ".c", ".S" or ".asm". ".s" files can't be used because they |
---|
67 | # don't get any preprocessing so there's no way to do renaming of their |
---|
68 | # functions. |
---|
69 | # |
---|
70 | # |
---|
71 | # WHOLE DIRECTORIES |
---|
72 | # |
---|
73 | # If a directory is given, then all files in it will be made available. |
---|
74 | # For example, |
---|
75 | # |
---|
76 | # cd $builddir/tune |
---|
77 | # perl $srcdir/tune/many.pl $HOME/newcode |
---|
78 | # |
---|
79 | # Each file should have a suffix, like "_experiment" above. |
---|
80 | # |
---|
81 | # |
---|
82 | # MPN DIRECTORIES |
---|
83 | # |
---|
84 | # mpn directories from the GMP source tree can be included, and this is a |
---|
85 | # convenient way to compare multiple implementations suiting different chips |
---|
86 | # in a CPU family. For example the following would make all x86 routines |
---|
87 | # available, |
---|
88 | # |
---|
89 | # cd $builddir/tune |
---|
90 | # perl $srcdir/tune/many.pl `find $srcdir/mpn/x86 -type d` |
---|
91 | # |
---|
92 | # On a new x86 chip a comparison could then be made to see how existing code |
---|
93 | # runs. For example, |
---|
94 | # |
---|
95 | # make -f Makefile.many speed-many |
---|
96 | # ./speed-many -s 1-30 -c \ |
---|
97 | # mpn_add_n_x86 mpn_add_n_pentium mpn_add_n_k6 mpn_add_n_k7 |
---|
98 | # |
---|
99 | # Files in "mpn" subdirectories don't need the "_experiment" style suffix |
---|
100 | # described above, instead a suffix is constructed from the subdirectory. |
---|
101 | # For example "mpn/x86/k7/mmx/mod_1.asm" will generate a function |
---|
102 | # mpn_mod_1_k7_mmx. The rule is to take the last directory name after the |
---|
103 | # "mpn", or the last two if there's three or more. (Check the generated |
---|
104 | # speed-many.c if in doubt.) |
---|
105 | # |
---|
106 | # |
---|
107 | # GENERIC C |
---|
108 | # |
---|
109 | # The mpn/generic directory can be included too, just like any processor |
---|
110 | # specific directory. This is a good way to compare assembler and generic C |
---|
111 | # implementations. For example, |
---|
112 | # |
---|
113 | # cd $builddir/tune |
---|
114 | # perl $srcdir/tune/many.pl $srcdir/mpn/generic |
---|
115 | # |
---|
116 | # or if just a few routines are of interest, then for example |
---|
117 | # |
---|
118 | # cd $builddir/tune |
---|
119 | # perl $srcdir/tune/many.pl \ |
---|
120 | # $srcdir/mpn/generic/lshift.c \ |
---|
121 | # $srcdir/mpn/generic/mod_1.c \ |
---|
122 | # $srcdir/mpn/generic/aorsmul_1.c |
---|
123 | # |
---|
124 | # giving mpn_lshift_generic etc. |
---|
125 | # |
---|
126 | # |
---|
127 | # TESTS/DEVEL PROGRAMS |
---|
128 | # |
---|
129 | # Makefile.many also has rules to build the tests/devel programs with suitable |
---|
130 | # renaming, and with some parameters for correctness or speed. This is less |
---|
131 | # convenient than the speed and try programs, but provides an independent |
---|
132 | # check. For example, |
---|
133 | # |
---|
134 | # make -f Makefile.many tests_mul_1_experimental |
---|
135 | # ./tests_mul_1_experimental |
---|
136 | # |
---|
137 | # and for speed |
---|
138 | # |
---|
139 | # make -f Makefile.many tests_mul_1_experimental_sp |
---|
140 | # ./tests_mul_1_experimental_sp |
---|
141 | # |
---|
142 | # Not all the programs support speed measuring, in which case only the |
---|
143 | # correctness test will be useful. |
---|
144 | # |
---|
145 | # The parameters for repetitions and host clock speed are -D defines. Some |
---|
146 | # defaults are provided at the end of Makefile.many, but probably these will |
---|
147 | # want to be overridden. For example, |
---|
148 | # |
---|
149 | # rm tests_mul_1_experimental.o |
---|
150 | # make -f Makefile.many \ |
---|
151 | # CFLAGS_TESTS="-DSIZE=50 -DTIMES=1000 -DRANDOM -DCLOCK=175000000" \ |
---|
152 | # tests_mul_1_experimental |
---|
153 | # ./tests_mul_1_experimental |
---|
154 | # |
---|
155 | # |
---|
156 | # OTHER NOTES |
---|
157 | # |
---|
158 | # The mappings of file names to functions, and the macros to then use for |
---|
159 | # speed measuring etc are driven by @table below. The scheme isn't |
---|
160 | # completely general, it's only got as many variations as have been needed |
---|
161 | # so far. |
---|
162 | # |
---|
163 | # Some functions are only made available in speed-many, or others only in |
---|
164 | # try-many. An @table entry speed=>none means no speed measuring is |
---|
165 | # available, or try=>none no try program testing. These can be removed |
---|
166 | # if/when the respective programs get the necessary support. |
---|
167 | # |
---|
168 | # If a file has "1c" or "nc" carry-in entrypoints, they're renamed and made |
---|
169 | # available too. These are recognised from PROLOGUE or MULFUNC_PROLOGUE in |
---|
170 | # .S and .asm files, or from a line starting with "mpn_foo_1c" in a .c file |
---|
171 | # (possibly via a #define), and on that basis are entirely optional. This |
---|
172 | # entrypoint matching is done for the standard entrypoints too, but it would |
---|
173 | # be very unusual to have for instance a mul_1c without a mul_1. |
---|
174 | # |
---|
175 | # Some mpz files are recognized. For example an experimental copy of |
---|
176 | # mpz/powm.c could be included as powm_new.c and would be called |
---|
177 | # mpz_powm_new. So far only speed measuring is available for these. |
---|
178 | # |
---|
179 | # For the ".S" and ".asm" files, both PIC and non-PIC objects are built. |
---|
180 | # The PIC functions have a "_pic" suffix, for example "mpn_mod_1_k7_mmx_pic". |
---|
181 | # This can be ignored for routines that don't differ for PIC, or for CPUs |
---|
182 | # where everything is PIC anyway. |
---|
183 | # |
---|
184 | # K&R compilers are supported via the same ansi2knr mechanism used by |
---|
185 | # automake, though it's hard to believe anyone will have much interest in |
---|
186 | # measuring a compiler so old that it doesn't even have an ANSI mode. |
---|
187 | # |
---|
188 | # The "-t" option can be used to print a trace of the files found and what's |
---|
189 | # done with them. A great deal of obscure output is produced, but it can |
---|
190 | # indicate where or why some files aren't being recognised etc. For |
---|
191 | # example, |
---|
192 | # |
---|
193 | # cd $builddir/tune |
---|
194 | # perl $srcdir/tune/many.pl -t $HOME/newcode/add_n_weird.asm |
---|
195 | # |
---|
196 | # In general, when including new code, all that's really necessary is that |
---|
197 | # it will compile or assemble under the current configuration. It's fine if |
---|
198 | # some code doesn't actually run due to bugs, or to needing a newer CPU or |
---|
199 | # whatever, simply don't ask for the offending routines when invoking |
---|
200 | # speed-many or try-many, or don't try to run them on sizes they don't yet |
---|
201 | # support, or whatever. |
---|
202 | # |
---|
203 | # |
---|
204 | # CPU SPECIFICS |
---|
205 | # |
---|
206 | # x86 - All the x86 code will assemble on any system, but code for newer |
---|
207 | # chips might not run on older chips. Expect SIGILLs from new |
---|
208 | # instructions on old chips. |
---|
209 | # |
---|
210 | # A few "new" instructions, like cmov for instance, are done as macros |
---|
211 | # and will generate some equivalent plain i386 code when HAVE_HOST_CPU |
---|
212 | # in config.m4 indicates an old CPU. It won't run fast, but it does |
---|
213 | # make it possible to test correctness. |
---|
214 | # |
---|
215 | # |
---|
216 | # INTERNALS |
---|
217 | # |
---|
218 | # The nonsense involving $ENV is some hooks used during development to add |
---|
219 | # additional functions temporarily. |
---|
220 | # |
---|
221 | # |
---|
222 | # FUTURE |
---|
223 | # |
---|
224 | # Maybe the C files should be compiled pic and non-pic too. Wait until |
---|
225 | # there's a difference that might be of interest. |
---|
226 | # |
---|
227 | # Warn if a file provides no functions. |
---|
228 | # |
---|
229 | # Allow mpz and mpn files of the same name. Currently the mpn fib2_ui |
---|
230 | # matching hides the mpz version of that. Will need to check the file |
---|
231 | # contents to see which it is. Would be worth allowing an "mpz_" or "mpn_" |
---|
232 | # prefix on the filenames to have working versions of both in one directory. |
---|
233 | # |
---|
234 | # |
---|
235 | # LIMITATIONS |
---|
236 | # |
---|
237 | # Some of the command lines can become very long when a lot of files are |
---|
238 | # included. If this is a problem on a given system the only suggestion is |
---|
239 | # to run many.pl for just those that are actually wanted at a particular |
---|
240 | # time. |
---|
241 | # |
---|
242 | # DOS 8.3 or SysV 14 char filesystems won't work, since the long filenames |
---|
243 | # generated will almost certainly fail to be unique. |
---|
244 | |
---|
245 | |
---|
246 | use strict; |
---|
247 | use File::Basename; |
---|
248 | use Getopt::Std; |
---|
249 | |
---|
250 | my %opt; |
---|
251 | getopts('t', \%opt); |
---|
252 | |
---|
253 | my @DIRECTORIES = @ARGV; |
---|
254 | if (defined $ENV{directories}) { push @DIRECTORIES, @{$ENV{directories}} } |
---|
255 | |
---|
256 | |
---|
257 | # regexp - matched against the start of the filename. If a grouping "(...)" |
---|
258 | # is present then only the first such part is used. |
---|
259 | # |
---|
260 | # mulfunc - filenames to be generated from a multi-function file. |
---|
261 | # |
---|
262 | # funs - functions provided by the file, defaulting to the filename with mpn |
---|
263 | # (or mpX). |
---|
264 | # |
---|
265 | # mpX - prefix like "mpz", defaulting to "mpn". |
---|
266 | # |
---|
267 | # ret - return value type. |
---|
268 | # |
---|
269 | # args, args_<fun> - arguments for the given function. If an args_<fun> is |
---|
270 | # set then it's used, otherwise plain args is used. "mp_limb_t |
---|
271 | # carry" is appended for carry-in variants. |
---|
272 | # |
---|
273 | # try - try.c TYPE_ to use, defaulting to TYPE_fun with the function name |
---|
274 | # in upper case. "C" is appended for carry-in variants. Can be |
---|
275 | # 'none' for no try program entry. |
---|
276 | # |
---|
277 | # speed - SPEED_ROUTINE_ to use, handled like "try". |
---|
278 | # |
---|
279 | # speed_flags - SPEED_ROUTINE_ to use, handled like "try". |
---|
280 | |
---|
281 | |
---|
282 | my @table = |
---|
283 | ( |
---|
284 | { |
---|
285 | 'regexp'=> 'add_n|sub_n', |
---|
286 | 'ret' => 'mp_limb_t', |
---|
287 | 'args' => 'mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size', |
---|
288 | 'speed' => 'SPEED_ROUTINE_MPN_BINARY_N', |
---|
289 | 'speed_flags'=> 'FLAG_R_OPTIONAL', |
---|
290 | }, |
---|
291 | { |
---|
292 | 'regexp'=> 'aors_n', |
---|
293 | 'mulfunc'=> ['add_n','sub_n'], |
---|
294 | 'ret' => 'mp_limb_t', |
---|
295 | 'args' => 'mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size', |
---|
296 | 'speed' => 'SPEED_ROUTINE_MPN_BINARY_N', |
---|
297 | 'speed_flags'=> 'FLAG_R_OPTIONAL', |
---|
298 | }, |
---|
299 | |
---|
300 | { |
---|
301 | 'regexp'=> 'addmul_1|submul_1', |
---|
302 | 'ret' => 'mp_limb_t', |
---|
303 | 'args' => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_limb_t mult', |
---|
304 | 'speed' => 'SPEED_ROUTINE_MPN_UNARY_1', |
---|
305 | 'speed_flags'=> 'FLAG_R', |
---|
306 | }, |
---|
307 | { |
---|
308 | 'regexp'=> 'aorsmul_1', |
---|
309 | 'mulfunc'=> ['addmul_1','submul_1'], |
---|
310 | 'ret' => 'mp_limb_t', |
---|
311 | 'args' => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_limb_t mult', |
---|
312 | 'speed' => 'SPEED_ROUTINE_MPN_UNARY_1', |
---|
313 | 'speed_flags'=> 'FLAG_R', |
---|
314 | }, |
---|
315 | |
---|
316 | { |
---|
317 | 'regexp'=> 'addsub_n', |
---|
318 | 'ret' => 'mp_limb_t', |
---|
319 | 'args' => 'mp_ptr sum, mp_ptr diff, mp_srcptr xp, mp_srcptr yp, mp_size_t size', |
---|
320 | 'speed_flags'=> 'FLAG_R_OPTIONAL', |
---|
321 | }, |
---|
322 | |
---|
323 | { |
---|
324 | 'regexp'=> 'bdivmod', |
---|
325 | 'ret' => 'mp_limb_t', |
---|
326 | 'args' => 'mp_ptr qp, mp_ptr up, mp_size_t usize, mp_srcptr vp, mp_size_t vsize, unsigned long int d', |
---|
327 | 'carrys'=> [''], |
---|
328 | 'try' => 'none', |
---|
329 | 'speed' => 'none', |
---|
330 | }, |
---|
331 | |
---|
332 | { |
---|
333 | 'regexp'=> 'com_n|copyi|copyd', |
---|
334 | 'ret' => 'void', |
---|
335 | 'args' => 'mp_ptr wp, mp_srcptr xp, mp_size_t size', |
---|
336 | 'speed' => 'SPEED_ROUTINE_MPN_COPY', |
---|
337 | }, |
---|
338 | |
---|
339 | { |
---|
340 | 'regexp'=> 'dive_1', |
---|
341 | 'funs' => ['divexact_1'], |
---|
342 | 'ret' => 'void', |
---|
343 | 'args' => 'mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor', |
---|
344 | 'speed_flags'=> 'FLAG_R', |
---|
345 | }, |
---|
346 | { |
---|
347 | 'regexp'=> 'diveby3', |
---|
348 | 'funs' => ['divexact_by3c'], |
---|
349 | 'ret' => 'mp_limb_t', |
---|
350 | 'args' => 'mp_ptr dst, mp_srcptr src, mp_size_t size', |
---|
351 | 'carrys'=> [''], |
---|
352 | 'speed' => 'SPEED_ROUTINE_MPN_COPY', |
---|
353 | }, |
---|
354 | |
---|
355 | # mpn_preinv_divrem_1 is an optional extra entrypoint |
---|
356 | { |
---|
357 | 'regexp'=> 'divrem_1', |
---|
358 | 'funs' => ['divrem_1', 'preinv_divrem_1'], |
---|
359 | 'ret' => 'mp_limb_t', |
---|
360 | 'args_divrem_1' => 'mp_ptr rp, mp_size_t xsize, mp_srcptr sp, mp_size_t size, mp_limb_t divisor', |
---|
361 | 'args_preinv_divrem_1' => 'mp_ptr rp, mp_size_t xsize, mp_srcptr sp, mp_size_t size, mp_limb_t divisor, mp_limb_t inverse, unsigned shift', |
---|
362 | 'speed_flags'=> 'FLAG_R', |
---|
363 | 'speed_suffixes' => ['f'], |
---|
364 | }, |
---|
365 | { |
---|
366 | 'regexp'=> 'pre_divrem_1', |
---|
367 | 'funs' => ['preinv_divrem_1'], |
---|
368 | 'ret' => 'mp_limb_t', |
---|
369 | 'args' => 'mp_ptr qp, mp_size_t qxn, mp_srcptr ap, mp_size_t asize, mp_limb_t divisor, mp_limb_t inverse, int shift', |
---|
370 | 'speed_flags' => 'FLAG_R', |
---|
371 | }, |
---|
372 | |
---|
373 | { |
---|
374 | 'regexp'=> 'divrem_2', |
---|
375 | 'ret' => 'mp_limb_t', |
---|
376 | 'args' => 'mp_ptr qp, mp_size_t qxn, mp_srcptr np, mp_size_t nsize, mp_srcptr dp', |
---|
377 | 'try' => 'none', |
---|
378 | }, |
---|
379 | |
---|
380 | { |
---|
381 | 'regexp'=> 'sb_divrem_mn', |
---|
382 | 'ret' => 'mp_limb_t', |
---|
383 | 'args' => 'mp_ptr qp, mp_ptr np, mp_size_t nsize, mp_srcptr dp, mp_size_t dsize', |
---|
384 | 'speed' => 'SPEED_ROUTINE_MPN_DC_DIVREM_SB', |
---|
385 | 'try-minsize' => 3, |
---|
386 | }, |
---|
387 | { |
---|
388 | 'regexp'=> 'tdiv_qr', |
---|
389 | 'ret' => 'void', |
---|
390 | 'args' => 'mp_ptr qp, mp_size_t qxn, mp_ptr np, mp_size_t nsize, mp_srcptr dp, mp_size_t dsize', |
---|
391 | 'speed' => 'none', |
---|
392 | }, |
---|
393 | |
---|
394 | { |
---|
395 | 'regexp'=> 'get_str', |
---|
396 | 'ret' => 'size_t', |
---|
397 | 'args' => 'unsigned char *str, int base, mp_ptr mptr, mp_size_t msize', |
---|
398 | 'speed_flags' => 'FLAG_R_OPTIONAL', |
---|
399 | 'try' => 'none', |
---|
400 | }, |
---|
401 | { |
---|
402 | 'regexp'=> 'set_str', |
---|
403 | 'ret' => 'mp_size_t', |
---|
404 | 'args' => 'mp_ptr xp, const unsigned char *str, size_t str_len, int base', |
---|
405 | 'speed_flags' => 'FLAG_R_OPTIONAL', |
---|
406 | 'try' => 'none', |
---|
407 | }, |
---|
408 | |
---|
409 | { |
---|
410 | 'regexp'=> 'fac_ui', |
---|
411 | 'mpX' => 'mpz', |
---|
412 | 'ret' => 'void', |
---|
413 | 'args' => 'mpz_ptr r, unsigned long n', |
---|
414 | 'speed_flags' => 'FLAG_NODATA', |
---|
415 | 'try' => 'none', |
---|
416 | }, |
---|
417 | |
---|
418 | { |
---|
419 | 'regexp'=> 'fib2_ui', |
---|
420 | 'ret' => 'void', |
---|
421 | 'args' => 'mp_ptr fp, mp_ptr f1p, unsigned long n', |
---|
422 | 'rename'=> ['__gmp_fib_table'], |
---|
423 | 'speed_flags' => 'FLAG_NODATA', |
---|
424 | 'try' => 'none', |
---|
425 | }, |
---|
426 | { |
---|
427 | 'regexp'=> 'fib_ui', |
---|
428 | 'mpX' => 'mpz', |
---|
429 | 'ret' => 'void', |
---|
430 | 'args' => 'mpz_ptr fn, unsigned long n', |
---|
431 | 'speed_flags' => 'FLAG_NODATA', |
---|
432 | 'try' => 'none', |
---|
433 | }, |
---|
434 | { |
---|
435 | 'regexp'=> 'fib2_ui', |
---|
436 | 'mpX' => 'mpz', |
---|
437 | 'ret' => 'void', |
---|
438 | 'args' => 'mpz_ptr fn, mpz_ptr fnsub1, unsigned long n', |
---|
439 | 'speed_flags' => 'FLAG_NODATA', |
---|
440 | 'try' => 'none', |
---|
441 | }, |
---|
442 | |
---|
443 | { |
---|
444 | 'regexp'=> 'lucnum_ui', |
---|
445 | 'mpX' => 'mpz', |
---|
446 | 'ret' => 'void', |
---|
447 | 'args' => 'mpz_ptr ln, unsigned long n', |
---|
448 | 'speed_flags' => 'FLAG_NODATA', |
---|
449 | 'try' => 'none', |
---|
450 | }, |
---|
451 | { |
---|
452 | 'regexp'=> 'lucnum2_ui', |
---|
453 | 'mpX' => 'mpz', |
---|
454 | 'ret' => 'void', |
---|
455 | 'args' => 'mpz_ptr ln, mpz_ptr lnsub1, unsigned long n', |
---|
456 | 'speed_flags' => 'FLAG_NODATA', |
---|
457 | 'try' => 'none', |
---|
458 | }, |
---|
459 | |
---|
460 | { |
---|
461 | 'regexp'=> 'gcd_1', |
---|
462 | 'ret' => 'mp_limb_t', |
---|
463 | 'args' => 'mp_ptr xp, mp_size_t xsize, mp_limb_t y', |
---|
464 | 'attrib'=> '__GMP_ATTRIBUTE_PURE', |
---|
465 | 'speed_flags'=> 'FLAG_R_OPTIONAL', |
---|
466 | 'speed_suffixes' => ['N'], |
---|
467 | }, |
---|
468 | { |
---|
469 | 'regexp'=> '(gcd)(?!(_1|ext|_finda))', |
---|
470 | 'ret' => 'mp_size_t', |
---|
471 | 'args' => 'mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t vsize', |
---|
472 | }, |
---|
473 | { |
---|
474 | 'regexp'=> 'gcd_finda', |
---|
475 | 'ret' => 'mp_limb_t', |
---|
476 | 'args' => 'mp_srcptr cp', |
---|
477 | 'attrib'=> '__GMP_ATTRIBUTE_PURE', |
---|
478 | }, |
---|
479 | |
---|
480 | |
---|
481 | { |
---|
482 | 'regexp'=> 'jacobi', |
---|
483 | 'funs' => ['jacobi', 'legendre', 'kronecker'], |
---|
484 | 'mpX' => 'mpz', |
---|
485 | 'ret' => 'int', |
---|
486 | 'args' => 'mpz_srcptr a, mpz_srcptr b', |
---|
487 | 'attrib'=> '__GMP_ATTRIBUTE_PURE', |
---|
488 | 'try-legendre' => 'TYPE_MPZ_JACOBI', |
---|
489 | }, |
---|
490 | { |
---|
491 | 'regexp'=> 'jacbase', |
---|
492 | 'funs' => ['jacobi_base'], |
---|
493 | 'ret' => 'mp_limb_t', |
---|
494 | 'args' => 'mp_limb_t a, mp_limb_t b, int bit1', |
---|
495 | 'attrib'=> 'ATTRIBUTE_CONST', |
---|
496 | 'speed' => 'SPEED_ROUTINE_MPN_JACBASE', |
---|
497 | 'try' => 'none', |
---|
498 | }, |
---|
499 | |
---|
500 | { |
---|
501 | 'regexp'=> 'logops_n', |
---|
502 | 'mulfunc'=> ['and_n','andn_n','nand_n','ior_n','iorn_n','nior_n','xor_n','xnor_n'], |
---|
503 | 'ret' => 'void', |
---|
504 | 'args' => 'mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size', |
---|
505 | 'speed' => 'SPEED_ROUTINE_MPN_BINARY_N', |
---|
506 | }, |
---|
507 | |
---|
508 | { |
---|
509 | 'regexp'=> '[lr]shift', |
---|
510 | 'ret' => 'mp_limb_t', |
---|
511 | 'args' => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, unsigned shift', |
---|
512 | 'speed' => 'SPEED_ROUTINE_MPN_UNARY_1', |
---|
513 | 'speed_flags'=> 'FLAG_R', |
---|
514 | }, |
---|
515 | |
---|
516 | # mpn_preinv_mod_1 is an optional extra entrypoint |
---|
517 | { |
---|
518 | 'regexp'=> '(mod_1)(?!_rs)', |
---|
519 | 'funs' => ['mod_1','preinv_mod_1'], |
---|
520 | 'ret' => 'mp_limb_t', |
---|
521 | 'args_mod_1' => 'mp_srcptr xp, mp_size_t size, mp_limb_t divisor', |
---|
522 | 'args_preinv_mod_1'=> 'mp_srcptr xp, mp_size_t size, mp_limb_t divisor, mp_limb_t inverse', |
---|
523 | 'attrib'=> '__GMP_ATTRIBUTE_PURE', |
---|
524 | 'speed_flags'=> 'FLAG_R', |
---|
525 | }, |
---|
526 | { |
---|
527 | 'regexp'=> 'pre_mod_1', |
---|
528 | 'funs' => ['preinv_mod_1'], |
---|
529 | 'ret' => 'mp_limb_t', |
---|
530 | 'args' => 'mp_srcptr xp, mp_size_t size, mp_limb_t divisor, mp_limb_t inverse', |
---|
531 | 'attrib'=> '__GMP_ATTRIBUTE_PURE', |
---|
532 | 'speed_flags'=> 'FLAG_R', |
---|
533 | }, |
---|
534 | { |
---|
535 | 'regexp'=> 'mod_34lsub1', |
---|
536 | 'ret' => 'mp_limb_t', |
---|
537 | 'args' => 'mp_srcptr src, mp_size_t len', |
---|
538 | 'attrib'=> '__GMP_ATTRIBUTE_PURE', |
---|
539 | }, |
---|
540 | { |
---|
541 | 'regexp'=> 'invert_limb', |
---|
542 | 'ret' => 'mp_limb_t', |
---|
543 | 'args' => 'mp_limb_t divisor', |
---|
544 | 'attrib'=> 'ATTRIBUTE_CONST', |
---|
545 | 'speed_flags'=> 'FLAG_R_OPTIONAL', |
---|
546 | 'try' => 'none', |
---|
547 | }, |
---|
548 | |
---|
549 | { |
---|
550 | # not for use with hppa reversed argument versions of mpn_umul_ppmm |
---|
551 | 'regexp'=> 'udiv', |
---|
552 | 'funs' => ['udiv_qrnnd'], |
---|
553 | 'ret' => 'mp_limb_t', |
---|
554 | 'args' => 'mp_limb_t *remptr, mp_limb_t n1, mp_limb_t n0, mp_limb_t d', |
---|
555 | 'speed' => 'none', |
---|
556 | 'try-minsize' => 2, |
---|
557 | }, |
---|
558 | |
---|
559 | { |
---|
560 | 'regexp'=> 'mode1o', |
---|
561 | 'funs' => ['modexact_1_odd'], |
---|
562 | 'ret' => 'mp_limb_t', |
---|
563 | 'args' => 'mp_srcptr src, mp_size_t size, mp_limb_t divisor', |
---|
564 | 'attrib'=> '__GMP_ATTRIBUTE_PURE', |
---|
565 | 'speed_flags'=> 'FLAG_R', |
---|
566 | }, |
---|
567 | { |
---|
568 | 'regexp'=> 'modlinv', |
---|
569 | 'funs' => ['modlimb_invert'], |
---|
570 | 'ret' => 'mp_limb_t', |
---|
571 | 'args' => 'mp_limb_t v', |
---|
572 | 'attrib'=> 'ATTRIBUTE_CONST', |
---|
573 | 'carrys'=> [''], |
---|
574 | 'try' => 'none', |
---|
575 | }, |
---|
576 | |
---|
577 | { |
---|
578 | 'regexp'=> 'mul_1', |
---|
579 | 'ret' => 'mp_limb_t', |
---|
580 | 'args' => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_limb_t mult', |
---|
581 | 'speed' => 'SPEED_ROUTINE_MPN_UNARY_1', |
---|
582 | 'speed_flags'=> 'FLAG_R', |
---|
583 | }, |
---|
584 | { |
---|
585 | 'regexp'=> 'mul_2', |
---|
586 | 'ret' => 'mp_limb_t', |
---|
587 | 'args' => 'mp_ptr wp, mp_srcptr xp, mp_size_t size, mp_srcptr mult', |
---|
588 | }, |
---|
589 | |
---|
590 | { |
---|
591 | 'regexp'=> 'mul_basecase', |
---|
592 | 'ret' => 'void', |
---|
593 | 'args' => 'mp_ptr wp, mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize', |
---|
594 | 'speed_flags' => 'FLAG_R_OPTIONAL | FLAG_RSIZE', |
---|
595 | }, |
---|
596 | { |
---|
597 | 'regexp'=> '(mul_n)[_.]', |
---|
598 | 'ret' => 'void', |
---|
599 | 'args' => 'mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size', |
---|
600 | 'rename'=> ['kara_mul_n','kara_sqr_n','toom3_mul_n','toom3_sqr_n'], |
---|
601 | }, |
---|
602 | { |
---|
603 | # not for use with hppa reversed argument versions of mpn_umul_ppmm |
---|
604 | 'regexp'=> 'umul', |
---|
605 | 'funs' => ['umul_ppmm'], |
---|
606 | 'ret' => 'mp_limb_t', |
---|
607 | 'args' => 'mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2', |
---|
608 | 'speed' => 'none', |
---|
609 | 'try-minsize' => 3, |
---|
610 | }, |
---|
611 | |
---|
612 | |
---|
613 | { |
---|
614 | 'regexp'=> 'popham', |
---|
615 | 'mulfunc'=> ['popcount','hamdist'], |
---|
616 | 'ret' => 'unsigned long', |
---|
617 | 'args_popcount'=> 'mp_srcptr xp, mp_size_t size', |
---|
618 | 'args_hamdist' => 'mp_srcptr xp, mp_srcptr yp, mp_size_t size', |
---|
619 | 'attrib'=> '__GMP_ATTRIBUTE_PURE', |
---|
620 | }, |
---|
621 | { |
---|
622 | 'regexp'=> 'popcount', |
---|
623 | 'ret' => 'unsigned long', |
---|
624 | 'args' => 'mp_srcptr xp, mp_size_t size', |
---|
625 | 'attrib'=> '__GMP_ATTRIBUTE_PURE', |
---|
626 | }, |
---|
627 | { |
---|
628 | 'regexp'=> 'hamdist', |
---|
629 | 'ret' => 'unsigned long', |
---|
630 | 'args' => 'mp_srcptr xp, mp_srcptr yp, mp_size_t size', |
---|
631 | 'attrib'=> '__GMP_ATTRIBUTE_PURE', |
---|
632 | # extra renaming to support sharing a data table with mpn_popcount |
---|
633 | 'rename'=> ['popcount'], |
---|
634 | }, |
---|
635 | |
---|
636 | { |
---|
637 | 'regexp'=> 'sqr_basecase', |
---|
638 | 'ret' => 'void', |
---|
639 | 'args' => 'mp_ptr wp, mp_srcptr xp, mp_size_t size', |
---|
640 | 'speed' => 'SPEED_ROUTINE_MPN_SQR', |
---|
641 | 'try' => 'TYPE_SQR', |
---|
642 | }, |
---|
643 | { |
---|
644 | 'regexp'=> 'sqr_diagonal', |
---|
645 | 'ret' => 'void', |
---|
646 | 'args' => 'mp_ptr wp, mp_srcptr xp, mp_size_t size', |
---|
647 | 'try' => 'none', |
---|
648 | }, |
---|
649 | |
---|
650 | { |
---|
651 | 'regexp'=> 'sqrtrem', |
---|
652 | 'ret' => 'mp_size_t', |
---|
653 | 'args' => 'mp_ptr root, mp_ptr rem, mp_srcptr src, mp_size_t size', |
---|
654 | 'try' => 'none', |
---|
655 | }, |
---|
656 | |
---|
657 | { |
---|
658 | 'regexp'=> 'cntlz', |
---|
659 | 'funs' => ['count_leading_zeros'], |
---|
660 | 'ret' => 'unsigned', |
---|
661 | 'args' => 'mp_limb_t', |
---|
662 | 'attrib'=> 'ATTRIBUTE_CONST', |
---|
663 | 'macro-before' => "#undef COUNT_LEADING_ZEROS_0", |
---|
664 | 'macro-speed' => |
---|
665 | '#ifdef COUNT_LEADING_ZEROS_0 |
---|
666 | #define COUNT_LEADING_ZEROS_0_ALLOWED 1 |
---|
667 | #else |
---|
668 | #define COUNT_LEADING_ZEROS_0_ALLOWED 0 |
---|
669 | #endif |
---|
670 | SPEED_ROUTINE_COUNT_ZEROS_A (1, COUNT_LEADING_ZEROS_0_ALLOWED); |
---|
671 | $fun (c, n); |
---|
672 | SPEED_ROUTINE_COUNT_ZEROS_B ()', |
---|
673 | 'speed_flags'=> 'FLAG_R_OPTIONAL', |
---|
674 | 'try' => 'none', |
---|
675 | }, |
---|
676 | { |
---|
677 | 'regexp'=> 'cnttz', |
---|
678 | 'funs' => ['count_trailing_zeros'], |
---|
679 | 'ret' => 'unsigned', |
---|
680 | 'args' => 'mp_limb_t', |
---|
681 | 'attrib'=> 'ATTRIBUTE_CONST', |
---|
682 | 'macro-speed' => ' |
---|
683 | SPEED_ROUTINE_COUNT_ZEROS_A (0, 0); |
---|
684 | $fun (c, n); |
---|
685 | SPEED_ROUTINE_COUNT_ZEROS_B ()', |
---|
686 | 'speed_flags' => 'FLAG_R_OPTIONAL', |
---|
687 | 'try' => 'none', |
---|
688 | }, |
---|
689 | |
---|
690 | { |
---|
691 | 'regexp'=> 'zero', |
---|
692 | 'ret' => 'void', |
---|
693 | 'args' => 'mp_ptr ptr, mp_size_t size', |
---|
694 | }, |
---|
695 | |
---|
696 | { |
---|
697 | 'regexp'=> '(powm)(?!_ui)', |
---|
698 | 'mpX' => 'mpz', |
---|
699 | 'ret' => 'void', |
---|
700 | 'args' => 'mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m', |
---|
701 | 'try' => 'none', |
---|
702 | }, |
---|
703 | { |
---|
704 | 'regexp'=> 'powm_ui', |
---|
705 | 'mpX' => 'mpz', |
---|
706 | 'ret' => 'void', |
---|
707 | 'args' => 'mpz_ptr r, mpz_srcptr b, unsigned long e, mpz_srcptr m', |
---|
708 | 'try' => 'none', |
---|
709 | }, |
---|
710 | |
---|
711 | # special for use during development |
---|
712 | { |
---|
713 | 'regexp'=> 'back', |
---|
714 | 'funs' => ['back_to_back'], |
---|
715 | 'ret' => 'void', |
---|
716 | 'args' => 'void', |
---|
717 | 'pic' => 'no', |
---|
718 | 'try' => 'none', |
---|
719 | 'speed_flags'=> 'FLAG_NODATA', |
---|
720 | }, |
---|
721 | ); |
---|
722 | |
---|
723 | if (defined $ENV{table2}) { |
---|
724 | my @newtable = @{$ENV{table2}}; |
---|
725 | push @newtable, @table; |
---|
726 | @table = @newtable; |
---|
727 | } |
---|
728 | |
---|
729 | |
---|
730 | my %pictable = |
---|
731 | ( |
---|
732 | 'yes' => { |
---|
733 | 'suffix' => '_pic', |
---|
734 | 'asmflags'=> '$(ASMFLAGS_PIC)', |
---|
735 | 'cflags' => '$(CFLAGS_PIC)', |
---|
736 | }, |
---|
737 | 'no' => { |
---|
738 | 'suffix' => '', |
---|
739 | 'asmflags'=> '', |
---|
740 | 'cflags' => '', |
---|
741 | }, |
---|
742 | ); |
---|
743 | |
---|
744 | |
---|
745 | my $builddir = $ENV{builddir}; |
---|
746 | $builddir = "." if (! defined $builddir); |
---|
747 | |
---|
748 | my $top_builddir = "${builddir}/.."; |
---|
749 | |
---|
750 | |
---|
751 | open(MAKEFILE, "<${builddir}/Makefile") |
---|
752 | or die "Cannot open Makefile: $!\nIs this a tune build directory?"; |
---|
753 | my ($srcdir, $top_srcdir); |
---|
754 | while (<MAKEFILE>) { |
---|
755 | if (/^srcdir = (.*)/) { $srcdir = $1; } |
---|
756 | if (/^top_srcdir = (.*)/) { $top_srcdir = $1; } |
---|
757 | } |
---|
758 | die "Cannot find \$srcdir in Makefile\n" if (! defined $srcdir); |
---|
759 | die "Cannot find \$top_srcdir in Makefile\n" if (! defined $top_srcdir); |
---|
760 | print "srcdir $srcdir\n" if $opt{'t'}; |
---|
761 | print "top_srcdir $top_srcdir\n" if $opt{'t'}; |
---|
762 | close(MAKEFILE); |
---|
763 | |
---|
764 | |
---|
765 | open(SPEED, ">speed-many.c") or die; |
---|
766 | print SPEED |
---|
767 | "/* speed-many.c generated by many.pl - DO NOT EDIT, CHANGES WILL BE LOST */ |
---|
768 | |
---|
769 | "; |
---|
770 | my $SPEED_EXTRA_ROUTINES = "#define SPEED_EXTRA_ROUTINES \\\n"; |
---|
771 | my $SPEED_EXTRA_PROTOS = "#define SPEED_EXTRA_PROTOS \\\n"; |
---|
772 | my $SPEED_CODE = ""; |
---|
773 | |
---|
774 | open(TRY, ">try-many.c") or die; |
---|
775 | print TRY |
---|
776 | "/* try-many.c generated by many.pl - DO NOT EDIT, CHANGES WILL BE LOST */\n" . |
---|
777 | "\n"; |
---|
778 | my $TRY_EXTRA_ROUTINES = "#define EXTRA_ROUTINES \\\n"; |
---|
779 | my $TRY_EXTRA_PROTOS = "#define EXTRA_PROTOS \\\n"; |
---|
780 | |
---|
781 | open(FD,"<${top_builddir}/libtool") or die "Cannot open \"${top_builddir}/libtool\": $!\n"; |
---|
782 | my $pic_flag; |
---|
783 | while (<FD>) { |
---|
784 | if (/^pic_flag="?([^"]*)"?$/) { |
---|
785 | $pic_flag=$1; |
---|
786 | last; |
---|
787 | } |
---|
788 | } |
---|
789 | close FD; |
---|
790 | if (! defined $pic_flag) { |
---|
791 | die "Cannot find pic_flag in ${top_builddir}/libtool"; |
---|
792 | } |
---|
793 | |
---|
794 | my $CFLAGS_PIC = $pic_flag; |
---|
795 | |
---|
796 | my $ASMFLAGS_PIC = ""; |
---|
797 | foreach (split /[ \t]/, $pic_flag) { |
---|
798 | if (/^-D/) { |
---|
799 | $ASMFLAGS_PIC .= " " . $_; |
---|
800 | } |
---|
801 | } |
---|
802 | |
---|
803 | open(MAKEFILE, ">Makefile.many") or die; |
---|
804 | print MAKEFILE |
---|
805 | "# Makefile.many generated by many.pl - DO NOT EDIT, CHANGES WILL BE LOST\n" . |
---|
806 | "\n" . |
---|
807 | "all: speed-many try-many\n" . |
---|
808 | "\n" . |
---|
809 | "#--------- begin included copy of basic Makefile ----------\n" . |
---|
810 | "\n"; |
---|
811 | open(FD,"<${builddir}/Makefile") or die "Cannot open \"${builddir}/Makefile\": $!\n"; |
---|
812 | print MAKEFILE <FD>; |
---|
813 | close FD; |
---|
814 | print MAKEFILE |
---|
815 | "\n" . |
---|
816 | "#--------- end included copy of basic Makefile ----------\n" . |
---|
817 | "\n" . |
---|
818 | "CFLAGS_PIC = $CFLAGS_PIC\n" . |
---|
819 | "ASMFLAGS_PIC = $ASMFLAGS_PIC\n" . |
---|
820 | "\n"; |
---|
821 | |
---|
822 | my $CLEAN=""; |
---|
823 | my $MANY_OBJS=""; |
---|
824 | |
---|
825 | |
---|
826 | sub print_ansi2knr { |
---|
827 | my ($base,$file,$includes) = @_; |
---|
828 | if (! defined $file) { $file = "$base.c"; } |
---|
829 | if (! defined $includes) { $includes = ""; } |
---|
830 | |
---|
831 | print MAKEFILE <<EOF; |
---|
832 | ${base}_.c: $file \$(ANSI2KNR) |
---|
833 | \$(CPP) \$(DEFS) \$(INCLUDES) $includes \$(AM_CPPFLAGS) \$(CPPFLAGS) $file | sed 's/^# \([0-9]\)/#line \\1/' | \$(ANSI2KNR) >${base}_.c |
---|
834 | |
---|
835 | EOF |
---|
836 | } |
---|
837 | |
---|
838 | |
---|
839 | # Spawning a glob is a touch slow when there's lots of files. |
---|
840 | my @files = (); |
---|
841 | foreach my $dir (@DIRECTORIES) { |
---|
842 | print "dir $dir\n" if $opt{'t'}; |
---|
843 | if (-f $dir) { |
---|
844 | push @files,$dir; |
---|
845 | } else { |
---|
846 | if (! opendir DD,$dir) { |
---|
847 | print "Cannot open $dir: $!\n"; |
---|
848 | } else { |
---|
849 | push @files, map {$_="$dir/$_"} grep /\.(c|asm|S|h)$/, readdir DD; |
---|
850 | closedir DD; |
---|
851 | } |
---|
852 | } |
---|
853 | } |
---|
854 | @files = sort @files; |
---|
855 | print "@files ",join(" ",@files),"\n" if $opt{'t'}; |
---|
856 | |
---|
857 | my $count_files = 0; |
---|
858 | my $count_functions = 0; |
---|
859 | my %seen_obj; |
---|
860 | my %seen_file; |
---|
861 | |
---|
862 | foreach my $file_full (@files) { |
---|
863 | if (! -f $file_full) { |
---|
864 | print "Not a file: $file_full\n"; |
---|
865 | next; |
---|
866 | } |
---|
867 | if (defined $seen_file{$file_full}) { |
---|
868 | print "Skipping duplicate file: $file_full\n"; |
---|
869 | next; |
---|
870 | } |
---|
871 | $seen_file{$file_full} = 1; |
---|
872 | |
---|
873 | my ($FILE,$path,$lang) = fileparse($file_full,"\.[a-zA-Z]+"); |
---|
874 | $path =~ s/\/$//; |
---|
875 | print "file $FILE path $path lang $lang\n" if $opt{'t'}; |
---|
876 | |
---|
877 | my @pic_choices; |
---|
878 | if ($lang eq '.asm') { @pic_choices=('no','yes'); } |
---|
879 | elsif ($lang eq '.c') { @pic_choices=('no'); } |
---|
880 | elsif ($lang eq '.S') { @pic_choices=('no','yes'); } |
---|
881 | elsif ($lang eq '.h') { @pic_choices=('no'); } |
---|
882 | else { next }; |
---|
883 | |
---|
884 | my ($t, $file_match); |
---|
885 | foreach my $p (@table) { |
---|
886 | # print " ",$p->{'regexp'},"\n" if $opt{'t'}; |
---|
887 | if ($FILE =~ "^($p->{'regexp'})") { |
---|
888 | $t = $p; |
---|
889 | $file_match = $1; |
---|
890 | $file_match = $2 if defined $2; |
---|
891 | last; |
---|
892 | } |
---|
893 | } |
---|
894 | next if ! defined $t; |
---|
895 | print "match $t->{'regexp'} $FILE ($file_full)\n" if $opt{'t'}; |
---|
896 | |
---|
897 | if (! open FD,"<$file_full") { print "Can't open $file_full: $!\n"; next } |
---|
898 | my @file_contents = <FD>; |
---|
899 | close FD; |
---|
900 | |
---|
901 | my $objs; |
---|
902 | if (defined $t->{'mulfunc'}) { $objs = $t->{'mulfunc'}; } |
---|
903 | else { $objs = [$file_match]; } |
---|
904 | print "objs @$objs\n" if $opt{'t'}; |
---|
905 | |
---|
906 | my $ret = $t->{'ret'}; |
---|
907 | if (! defined $ret && $lang eq '.h') { $ret = ''; } |
---|
908 | if (! defined $ret) { die "$FILE return type not defined\n" }; |
---|
909 | print "ret $ret\n" if $opt{'t'}; |
---|
910 | |
---|
911 | my $mpX = $t->{'mpX'}; |
---|
912 | if (! defined $mpX) { $mpX = ($lang eq '.h' ? '' : 'mpn'); } |
---|
913 | $mpX = "${mpX}_" if $mpX ne ''; |
---|
914 | print "mpX $mpX\n" if $opt{'t'}; |
---|
915 | |
---|
916 | my $carrys; |
---|
917 | if (defined $t->{'carrys'}) { $carrys = $t->{'carrys'}; } |
---|
918 | else { $carrys = ['','c']; } |
---|
919 | print "carrys $carrys @$carrys\n" if $opt{'t'}; |
---|
920 | |
---|
921 | # some restriction functions are implemented, but they're not very useful |
---|
922 | my $restriction=''; |
---|
923 | |
---|
924 | my $suffix; |
---|
925 | if ($FILE =~ ("${file_match}_(.+)")) { |
---|
926 | $suffix = $1; |
---|
927 | } elsif ($path =~ /\/mp[zn]\/(.*)$/) { |
---|
928 | # derive the suffix from the path |
---|
929 | $suffix = $1; |
---|
930 | $suffix =~ s/\//_/g; |
---|
931 | # use last directory name, or if there's 3 or more then the last two |
---|
932 | if ($suffix =~ /([^_]*_)+([^_]+_[^_]+)$/) { |
---|
933 | $suffix = $2; |
---|
934 | } elsif ($suffix =~ /([^_]*_)*([^_]+)$/) { |
---|
935 | $suffix = $2; |
---|
936 | } |
---|
937 | } else { |
---|
938 | die "Can't determine suffix for: $file_full (path $path)\n"; |
---|
939 | } |
---|
940 | print "suffix $suffix\n" if $opt{'t'}; |
---|
941 | |
---|
942 | $count_files++; |
---|
943 | |
---|
944 | foreach my $obj (@{$objs}) { |
---|
945 | print "obj $obj\n" if $opt{'t'}; |
---|
946 | |
---|
947 | my $obj_with_suffix = "${obj}_$suffix"; |
---|
948 | if (defined $seen_obj{$obj_with_suffix}) { |
---|
949 | print "Skipping duplicate object: $obj_with_suffix\n"; |
---|
950 | print " first from: $seen_obj{$obj_with_suffix}\n"; |
---|
951 | print " now from: $file_full\n"; |
---|
952 | next; |
---|
953 | } |
---|
954 | $seen_obj{$obj_with_suffix} = $file_full; |
---|
955 | |
---|
956 | my $funs = $t->{'funs'}; |
---|
957 | $funs = [$obj] if ! defined $funs; |
---|
958 | print "funs @$funs\n" if $opt{'t'}; |
---|
959 | |
---|
960 | if (defined $t->{'pic'}) { @pic_choices = ('no'); } |
---|
961 | |
---|
962 | foreach my $pic (map {$pictable{$_}} @pic_choices) { |
---|
963 | print "pic $pic->{'suffix'}\n" if $opt{'t'}; |
---|
964 | |
---|
965 | my $objbase = "${obj}_$suffix$pic->{'suffix'}"; |
---|
966 | print "objbase $objbase\n" if $opt{'t'}; |
---|
967 | |
---|
968 | if ($path !~ "." && -f "${objbase}.c") { |
---|
969 | die "Already have ${objbase}.c"; |
---|
970 | } |
---|
971 | |
---|
972 | my $tmp_file = "tmp-$objbase.c"; |
---|
973 | |
---|
974 | my $renaming; |
---|
975 | foreach my $fun (@{$funs}) { |
---|
976 | if ($mpX eq 'mpn_' && $lang eq '.c') { |
---|
977 | $renaming .= "\t\t-DHAVE_NATIVE_mpn_$fun=1 \\\n"; |
---|
978 | } |
---|
979 | |
---|
980 | # The carry-in variant is with a "c" appended, unless there's a "_1" |
---|
981 | # somewhere, eg. "modexact_1_odd", in which case that becomes "_1c". |
---|
982 | my $fun_carry = $fun; |
---|
983 | if (! ($fun_carry =~ s/_1/_1c/)) { $fun_carry = "${fun}c"; } |
---|
984 | |
---|
985 | $renaming .= |
---|
986 | "\t\t-D__g$mpX$fun=$mpX${fun}_$suffix$pic->{'suffix'} \\\n" . |
---|
987 | "\t\t-D__g$mpX$fun_carry=$mpX${fun_carry}_$suffix$pic->{'suffix'} \\\n"; |
---|
988 | } |
---|
989 | foreach my $r (@{$t->{'rename'}}) { |
---|
990 | if ($r =~ /^__gmp/) { |
---|
991 | $renaming .= "\\\n" . |
---|
992 | "\t\t-D$r=${r}_$suffix$pic->{'suffix'}"; |
---|
993 | } else { |
---|
994 | $renaming .= "\\\n" . |
---|
995 | "\t\t-D__g$mpX$r=$mpX${r}_$suffix$pic->{'suffix'}"; |
---|
996 | } |
---|
997 | } |
---|
998 | print "renaming $renaming\n" if $opt{'t'}; |
---|
999 | |
---|
1000 | print MAKEFILE "\n"; |
---|
1001 | if ($lang eq '.asm') { |
---|
1002 | print MAKEFILE |
---|
1003 | "$objbase.o: $file_full \$(ASM_HEADERS)\n" . |
---|
1004 | " \$(M4) \$(M4FLAGS) -DOPERATION_$obj $pic->{'asmflags'} \\\n" . |
---|
1005 | "$renaming" . |
---|
1006 | " $file_full >tmp-$objbase.s\n" . |
---|
1007 | " \$(CCAS) \$(COMPILE_FLAGS) $pic->{'cflags'} tmp-$objbase.s -o $objbase.o\n" . |
---|
1008 | " \$(RM_TMP) tmp-$objbase.s\n"; |
---|
1009 | $MANY_OBJS .= " $objbase.o"; |
---|
1010 | |
---|
1011 | } elsif ($lang eq '.c') { |
---|
1012 | print MAKEFILE |
---|
1013 | "$objbase.o: $file_full\n" . |
---|
1014 | " \$(COMPILE) -DOPERATION_$obj $pic->{'cflags'} \\\n" . |
---|
1015 | "$renaming" . |
---|
1016 | " -c $file_full -o $objbase.o\n"; |
---|
1017 | print_ansi2knr($objbase, |
---|
1018 | $file_full, |
---|
1019 | " -DOPERATION_$obj\\\n$renaming\t\t"); |
---|
1020 | $MANY_OBJS .= " $objbase\$U.o"; |
---|
1021 | |
---|
1022 | } elsif ($lang eq '.S') { |
---|
1023 | print MAKEFILE |
---|
1024 | "$objbase.o: $file_full\n" . |
---|
1025 | " \$(COMPILE) -g $pic->{'asmflags'} \\\n" . |
---|
1026 | "$renaming" . |
---|
1027 | " -c $file_full -o $objbase.o\n"; |
---|
1028 | $MANY_OBJS .= " $objbase.o"; |
---|
1029 | |
---|
1030 | } elsif ($lang eq '.h') { |
---|
1031 | print MAKEFILE |
---|
1032 | "$objbase.o: tmp-$objbase.c $file_full\n" . |
---|
1033 | " \$(COMPILE) -DOPERATION_$obj $pic->{'cflags'} \\\n" . |
---|
1034 | "$renaming" . |
---|
1035 | " -c tmp-$objbase.c -o $objbase.o\n"; |
---|
1036 | print_ansi2knr($objbase, |
---|
1037 | "tmp-$objbase.c", |
---|
1038 | " -DOPERATION_$obj\\\n$renaming\t\t"); |
---|
1039 | $MANY_OBJS .= " $objbase\$U.o"; |
---|
1040 | |
---|
1041 | $CLEAN .= " tmp-$objbase.c"; |
---|
1042 | open(TMP_C,">tmp-$objbase.c") |
---|
1043 | or die "Can't create tmp-$objbase.c: $!\n"; |
---|
1044 | print TMP_C |
---|
1045 | "/* tmp-$objbase.c generated by many.pl - DO NOT EDIT, CHANGES WILL BE LOST */ |
---|
1046 | |
---|
1047 | #include \"gmp.h\" |
---|
1048 | #include \"gmp-impl.h\" |
---|
1049 | #include \"longlong.h\" |
---|
1050 | #include \"speed.h\" |
---|
1051 | |
---|
1052 | "; |
---|
1053 | } |
---|
1054 | |
---|
1055 | my $tests_program = "$top_srcdir/tests/devel/$obj.c"; |
---|
1056 | if (-f $tests_program) { |
---|
1057 | $tests_program = "\$(top_srcdir)/tests/devel/$obj.c"; |
---|
1058 | print_ansi2knr("tests_${objbase}", |
---|
1059 | $tests_program, |
---|
1060 | "\\\n$renaming\t\t\$(CFLAGS_TESTS_SP)"); |
---|
1061 | print_ansi2knr("tests_${objbase}_sp", |
---|
1062 | $tests_program, |
---|
1063 | "\\\n$renaming\t\t\$(CFLAGS_TESTS_SP)"); |
---|
1064 | |
---|
1065 | print MAKEFILE <<EOF; |
---|
1066 | tests_$objbase.o: $tests_program |
---|
1067 | \$(COMPILE) \$(CFLAGS_TESTS) \\ |
---|
1068 | $renaming -c $tests_program -o tests_$objbase.o |
---|
1069 | |
---|
1070 | tests_$objbase: $objbase\$U.o tests_$objbase\$U.o ../libgmp.la |
---|
1071 | \$(LINK) tests_$objbase\$U.o $objbase\$U.o ../libgmp.la -o tests_$objbase |
---|
1072 | |
---|
1073 | tests_${objbase}_sp.o: $tests_program |
---|
1074 | \$(COMPILE) \$(CFLAGS_TESTS_SP) \\ |
---|
1075 | $renaming -c $tests_program -o tests_${objbase}_sp.o |
---|
1076 | |
---|
1077 | tests_${objbase}_sp: $objbase\$U.o tests_${objbase}_sp\$U.o ../libgmp.la |
---|
1078 | \$(LINK) tests_${objbase}_sp\$U.o $objbase\$U.o ../libgmp.la -o tests_${objbase}_sp |
---|
1079 | |
---|
1080 | EOF |
---|
1081 | $CLEAN .= " tests_$objbase tests_${objbase}_sp"; |
---|
1082 | } |
---|
1083 | |
---|
1084 | foreach my $fun (@{$funs}) { |
---|
1085 | print "fun $fun\n" if $opt{'t'}; |
---|
1086 | |
---|
1087 | if ($lang eq '.h') { |
---|
1088 | my $macro_before = $t->{'macro_before'}; |
---|
1089 | $macro_before = "" if ! defined $macro_before; |
---|
1090 | print TMP_C |
---|
1091 | "$macro_before |
---|
1092 | #undef $fun |
---|
1093 | #include \"$file_full\" |
---|
1094 | |
---|
1095 | "; |
---|
1096 | } |
---|
1097 | |
---|
1098 | my $args = $t->{"args_$fun"}; |
---|
1099 | if (! defined $args) { $args = $t->{'args'}; } |
---|
1100 | if (! defined $args) { die "Need args for $fun\n"; } |
---|
1101 | print "args $args\n" if $opt{'t'}; |
---|
1102 | |
---|
1103 | foreach my $carry (@$carrys) { |
---|
1104 | print "carry $carry\n" if $opt{'t'}; |
---|
1105 | |
---|
1106 | my $fun_carry = $fun; |
---|
1107 | if (! ($fun_carry =~ s/_1/_1$carry/)) { $fun_carry = "$fun$carry"; } |
---|
1108 | print "fun_carry $fun_carry\n" if $opt{'t'}; |
---|
1109 | |
---|
1110 | if ($lang =~ /\.(asm|S)/ |
---|
1111 | && ! grep(m"PROLOGUE\((.* )?$mpX$fun_carry[ )]",@file_contents)) { |
---|
1112 | print "no PROLOGUE $mpX$fun_carry\n" if $opt{'t'}; |
---|
1113 | next; |
---|
1114 | } |
---|
1115 | if ($lang eq '.c' |
---|
1116 | && ! grep(m"^(#define FUNCTION\s+)?$mpX$fun_carry\W", @file_contents)) { |
---|
1117 | print "no mention of $mpX$fun_carry\n" if $opt{'t'}; |
---|
1118 | next; |
---|
1119 | } |
---|
1120 | if ($lang eq '.h' |
---|
1121 | && ! grep(m"^#define $fun_carry\W", @file_contents)) { |
---|
1122 | print "no mention of #define $fun_carry\n" if $opt{'t'}; |
---|
1123 | next; |
---|
1124 | } |
---|
1125 | |
---|
1126 | $count_functions++; |
---|
1127 | |
---|
1128 | my $carryarg; |
---|
1129 | if (defined $t->{'carryarg'}) { $carryarg = $t->{'carryarg'}; } |
---|
1130 | if ($carry eq '') { $carryarg = ''; } |
---|
1131 | else { $carryarg = ', mp_limb_t carry'; } |
---|
1132 | print "carryarg $carryarg\n" if $opt{'t'}; |
---|
1133 | |
---|
1134 | my $funfull="$mpX${fun_carry}_$suffix$pic->{'suffix'}"; |
---|
1135 | print "funfull $funfull\n" if $opt{'t'}; |
---|
1136 | |
---|
1137 | if ($lang ne '.h') { |
---|
1138 | my $attrib = $t->{'attrib'}; |
---|
1139 | if (defined $attrib) { $attrib = " $attrib"; } |
---|
1140 | else { $attrib = ''; } |
---|
1141 | |
---|
1142 | my $proto = "$t->{'ret'} $funfull _PROTO (($args$carryarg))$attrib; \\\n"; |
---|
1143 | $SPEED_EXTRA_PROTOS .= $proto; |
---|
1144 | $TRY_EXTRA_PROTOS .= $proto; |
---|
1145 | } |
---|
1146 | |
---|
1147 | my $try_type = $t->{"try-$fun"}; |
---|
1148 | $try_type = $t->{'try'} if ! defined $try_type; |
---|
1149 | if (! defined $try_type) { |
---|
1150 | if ($mpX eq 'mpn_') { |
---|
1151 | $try_type = "TYPE_\U$fun_carry"; |
---|
1152 | } else { |
---|
1153 | $try_type = "TYPE_\U$mpX\U$fun_carry"; |
---|
1154 | } |
---|
1155 | } |
---|
1156 | print "try_type $try_type\n" if $opt{'t'}; |
---|
1157 | |
---|
1158 | my $try_minsize = $t->{'try-minsize'}; |
---|
1159 | if (defined $try_minsize) { |
---|
1160 | $try_minsize = ", " . $try_minsize; |
---|
1161 | } else { |
---|
1162 | $try_minsize = ""; |
---|
1163 | } |
---|
1164 | print "try_minsize $try_minsize\n" if $opt{'t'}; |
---|
1165 | |
---|
1166 | if ($try_type ne 'none') { |
---|
1167 | $TRY_EXTRA_ROUTINES .= |
---|
1168 | " { TRY($mpX${fun_carry}_$suffix$pic->{'suffix'}), $try_type$try_minsize }, \\\n"; |
---|
1169 | } |
---|
1170 | |
---|
1171 | my $speed_flags = $t->{'speed_flags'}; |
---|
1172 | $speed_flags = '0' if ! defined $speed_flags; |
---|
1173 | print "speed_flags $speed_flags\n" if $opt{'t'}; |
---|
1174 | |
---|
1175 | my $speed_routine = $t->{'speed'}; |
---|
1176 | $speed_routine = "SPEED_ROUTINE_\U$mpX\U$fun" |
---|
1177 | if !defined $speed_routine; |
---|
1178 | if (! ($speed_routine =~ s/_1/_1\U$carry/)) { |
---|
1179 | $speed_routine = "$speed_routine\U$carry"; |
---|
1180 | } |
---|
1181 | print "speed_routine $speed_routine\n" if $opt{'t'}; |
---|
1182 | |
---|
1183 | my @speed_suffixes = (); |
---|
1184 | push (@speed_suffixes, '') if $speed_routine ne 'none'; |
---|
1185 | push (@speed_suffixes, @{$t->{'speed_suffixes'}}) |
---|
1186 | if defined $t->{'speed_suffixes'}; |
---|
1187 | |
---|
1188 | my $macro_speed = $t->{'macro-speed'}; |
---|
1189 | $macro_speed = "$speed_routine ($fun_carry)" if ! defined $macro_speed; |
---|
1190 | $macro_speed =~ s/\$fun/$fun_carry/g; |
---|
1191 | |
---|
1192 | foreach my $S (@speed_suffixes) { |
---|
1193 | my $Sfunfull="$mpX${fun_carry}${S}_$suffix$pic->{'suffix'}"; |
---|
1194 | |
---|
1195 | $SPEED_EXTRA_PROTOS .= |
---|
1196 | "double speed_$Sfunfull _PROTO ((struct speed_params *s)); \\\n"; |
---|
1197 | $SPEED_EXTRA_ROUTINES .= |
---|
1198 | " { \"$Sfunfull\", speed_$Sfunfull, $speed_flags }, \\\n"; |
---|
1199 | if ($lang eq '.h') { |
---|
1200 | print TMP_C |
---|
1201 | "double |
---|
1202 | speed_$Sfunfull (struct speed_params *s) |
---|
1203 | { |
---|
1204 | $macro_speed |
---|
1205 | } |
---|
1206 | |
---|
1207 | "; |
---|
1208 | } else { |
---|
1209 | $SPEED_CODE .= |
---|
1210 | "double\n" . |
---|
1211 | "speed_$Sfunfull (struct speed_params *s)\n" . |
---|
1212 | "{\n" . |
---|
1213 | "$restriction" . |
---|
1214 | " $speed_routine\U$S\E ($funfull)\n" . |
---|
1215 | "}\n"; |
---|
1216 | } |
---|
1217 | } |
---|
1218 | } |
---|
1219 | } |
---|
1220 | } |
---|
1221 | } |
---|
1222 | } |
---|
1223 | |
---|
1224 | |
---|
1225 | print SPEED $SPEED_EXTRA_PROTOS . "\n"; |
---|
1226 | print SPEED $SPEED_EXTRA_ROUTINES . "\n"; |
---|
1227 | if (defined $ENV{speedinc}) { print SPEED $ENV{speedinc} . "\n"; } |
---|
1228 | print SPEED |
---|
1229 | "#include \"speed.c\"\n" . |
---|
1230 | "\n"; |
---|
1231 | print SPEED $SPEED_CODE; |
---|
1232 | |
---|
1233 | print TRY $TRY_EXTRA_ROUTINES . "\n"; |
---|
1234 | print TRY $TRY_EXTRA_PROTOS . "\n"; |
---|
1235 | my $tryinc = ""; |
---|
1236 | if (defined $ENV{tryinc}) { |
---|
1237 | $tryinc = $ENV{tryinc}; |
---|
1238 | print TRY "#include \"$tryinc\"\n"; |
---|
1239 | } |
---|
1240 | print "tryinc $tryinc\n" if $opt{'t'}; |
---|
1241 | print TRY |
---|
1242 | "#include \"try.c\"\n" . |
---|
1243 | "\n"; |
---|
1244 | |
---|
1245 | my $extra_libraries = ""; |
---|
1246 | if (defined $ENV{extra_libraries}) { $extra_libraries = $ENV{extra_libraries};} |
---|
1247 | |
---|
1248 | my $trydeps = ""; |
---|
1249 | if (defined $ENV{trydeps}) { $trydeps = $ENV{trydeps}; } |
---|
1250 | $trydeps .= " $tryinc"; |
---|
1251 | print "trydeps $trydeps\n" if $opt{'t'}; |
---|
1252 | |
---|
1253 | print MAKEFILE <<EOF; |
---|
1254 | |
---|
1255 | MANY_OBJS = $MANY_OBJS |
---|
1256 | MANY_CLEAN = \$(MANY_OBJS) \\ |
---|
1257 | speed-many.c speed-many\$U.o speed-many\$(EXEEXT) \\ |
---|
1258 | try-many.c try-many\$U.o try-many \\ |
---|
1259 | $CLEAN |
---|
1260 | MANY_DISTCLEAN = Makefile.many |
---|
1261 | |
---|
1262 | speed-many: \$(MANY_OBJS) speed-many\$U.o libspeed.la $extra_libraries |
---|
1263 | \$(LINK) \$(LDFLAGS) speed-many\$U.o \$(MANY_OBJS) \$(LDADD) \$(LIBS) $extra_libraries |
---|
1264 | |
---|
1265 | try-many: \$(MANY_OBJS) try-many\$U.o libspeed.la $extra_libraries |
---|
1266 | \$(LINK) \$(LDFLAGS) try-many\$U.o \$(MANY_OBJS) \$(LDADD) \$(LIBS) $extra_libraries |
---|
1267 | |
---|
1268 | try-many.o: try-many.c \$(top_srcdir)/tests/devel/try.c $trydeps |
---|
1269 | \$(COMPILE) -I\$(top_srcdir)/tests/devel -c try-many.c |
---|
1270 | |
---|
1271 | EOF |
---|
1272 | |
---|
1273 | print_ansi2knr("speed-many"); |
---|
1274 | print_ansi2knr("try-many", |
---|
1275 | "\$(top_srcdir)/tests/devel/try.c", |
---|
1276 | "-I\$(top_srcdir)/tests/devel"); |
---|
1277 | |
---|
1278 | print MAKEFILE <<EOF; |
---|
1279 | RM_TMP = rm -f |
---|
1280 | CFLAGS_TESTS = -DSIZE=50 -DTIMES=1 -DRANDOM -DCLOCK=333000000 |
---|
1281 | CFLAGS_TESTS_SP = -DSIZE=1024 -DNOCHECK -DOPS=200000000 -DCLOCK=333000000 |
---|
1282 | EOF |
---|
1283 | |
---|
1284 | |
---|
1285 | print "Total $count_files files, $count_functions functions\n"; |
---|
1286 | |
---|
1287 | |
---|
1288 | |
---|
1289 | # Local variables: |
---|
1290 | # perl-indent-level: 2 |
---|
1291 | # End: |
---|