1 | /************************************************* |
---|
2 | * PCRE testing program * |
---|
3 | *************************************************/ |
---|
4 | |
---|
5 | #include <ctype.h> |
---|
6 | #include <stdio.h> |
---|
7 | #include <string.h> |
---|
8 | #include <stdlib.h> |
---|
9 | #include <time.h> |
---|
10 | #include <locale.h> |
---|
11 | |
---|
12 | /* Use the internal info for displaying the results of pcre_study(). */ |
---|
13 | |
---|
14 | #include "internal.h" |
---|
15 | |
---|
16 | /* It is possible to compile this test program without including support for |
---|
17 | testing the POSIX interface, though this is not available via the standard |
---|
18 | Makefile. */ |
---|
19 | |
---|
20 | #if !defined NOPOSIX |
---|
21 | #include "pcreposix.h" |
---|
22 | #endif |
---|
23 | |
---|
24 | #ifndef CLOCKS_PER_SEC |
---|
25 | #ifdef CLK_TCK |
---|
26 | #define CLOCKS_PER_SEC CLK_TCK |
---|
27 | #else |
---|
28 | #define CLOCKS_PER_SEC 100 |
---|
29 | #endif |
---|
30 | #endif |
---|
31 | |
---|
32 | #define LOOPREPEAT 20000 |
---|
33 | |
---|
34 | |
---|
35 | static FILE *outfile; |
---|
36 | static int log_store = 0; |
---|
37 | static size_t gotten_store; |
---|
38 | |
---|
39 | |
---|
40 | |
---|
41 | static int utf8_table1[] = { |
---|
42 | 0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff}; |
---|
43 | |
---|
44 | static int utf8_table2[] = { |
---|
45 | 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; |
---|
46 | |
---|
47 | static int utf8_table3[] = { |
---|
48 | 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; |
---|
49 | |
---|
50 | |
---|
51 | /************************************************* |
---|
52 | * Convert character value to UTF-8 * |
---|
53 | *************************************************/ |
---|
54 | |
---|
55 | /* This function takes an integer value in the range 0 - 0x7fffffff |
---|
56 | and encodes it as a UTF-8 character in 0 to 6 bytes. |
---|
57 | |
---|
58 | Arguments: |
---|
59 | cvalue the character value |
---|
60 | buffer pointer to buffer for result - at least 6 bytes long |
---|
61 | |
---|
62 | Returns: number of characters placed in the buffer |
---|
63 | -1 if input character is negative |
---|
64 | 0 if input character is positive but too big (only when |
---|
65 | int is longer than 32 bits) |
---|
66 | */ |
---|
67 | |
---|
68 | static int |
---|
69 | ord2utf8(int cvalue, unsigned char *buffer) |
---|
70 | { |
---|
71 | register int i, j; |
---|
72 | for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++) |
---|
73 | if (cvalue <= utf8_table1[i]) break; |
---|
74 | if (i >= sizeof(utf8_table1)/sizeof(int)) return 0; |
---|
75 | if (cvalue < 0) return -1; |
---|
76 | |
---|
77 | buffer += i; |
---|
78 | for (j = i; j > 0; j--) |
---|
79 | { |
---|
80 | *buffer-- = 0x80 | (cvalue & 0x3f); |
---|
81 | cvalue >>= 6; |
---|
82 | } |
---|
83 | *buffer = utf8_table2[i] | cvalue; |
---|
84 | return i + 1; |
---|
85 | } |
---|
86 | |
---|
87 | |
---|
88 | /************************************************* |
---|
89 | * Convert UTF-8 string to value * |
---|
90 | *************************************************/ |
---|
91 | |
---|
92 | /* This function takes one or more bytes that represents a UTF-8 character, |
---|
93 | and returns the value of the character. |
---|
94 | |
---|
95 | Argument: |
---|
96 | buffer a pointer to the byte vector |
---|
97 | vptr a pointer to an int to receive the value |
---|
98 | |
---|
99 | Returns: > 0 => the number of bytes consumed |
---|
100 | -6 to 0 => malformed UTF-8 character at offset = (-return) |
---|
101 | */ |
---|
102 | |
---|
103 | int |
---|
104 | utf82ord(unsigned char *buffer, int *vptr) |
---|
105 | { |
---|
106 | int c = *buffer++; |
---|
107 | int d = c; |
---|
108 | int i, j, s; |
---|
109 | |
---|
110 | for (i = -1; i < 6; i++) /* i is number of additional bytes */ |
---|
111 | { |
---|
112 | if ((d & 0x80) == 0) break; |
---|
113 | d <<= 1; |
---|
114 | } |
---|
115 | |
---|
116 | if (i == -1) { *vptr = c; return 1; } /* ascii character */ |
---|
117 | if (i == 0 || i == 6) return 0; /* invalid UTF-8 */ |
---|
118 | |
---|
119 | /* i now has a value in the range 1-5 */ |
---|
120 | |
---|
121 | s = 6*i; |
---|
122 | d = (c & utf8_table3[i]) << s; |
---|
123 | |
---|
124 | for (j = 0; j < i; j++) |
---|
125 | { |
---|
126 | c = *buffer++; |
---|
127 | if ((c & 0xc0) != 0x80) return -(j+1); |
---|
128 | s -= 6; |
---|
129 | d |= (c & 0x3f) << s; |
---|
130 | } |
---|
131 | |
---|
132 | /* Check that encoding was the correct unique one */ |
---|
133 | |
---|
134 | for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++) |
---|
135 | if (d <= utf8_table1[j]) break; |
---|
136 | if (j != i) return -(i+1); |
---|
137 | |
---|
138 | /* Valid value */ |
---|
139 | |
---|
140 | *vptr = d; |
---|
141 | return i+1; |
---|
142 | } |
---|
143 | |
---|
144 | |
---|
145 | |
---|
146 | |
---|
147 | |
---|
148 | |
---|
149 | /* Debugging function to print the internal form of the regex. This is the same |
---|
150 | code as contained in pcre.c under the DEBUG macro. */ |
---|
151 | |
---|
152 | static const char *OP_names[] = { |
---|
153 | "End", "\\A", "\\B", "\\b", "\\D", "\\d", |
---|
154 | "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z", |
---|
155 | "Opt", "^", "$", "Any", "chars", "not", |
---|
156 | "*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
---|
157 | "*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
---|
158 | "*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
---|
159 | "*", "*?", "+", "+?", "?", "??", "{", "{", |
---|
160 | "class", "Ref", "Recurse", |
---|
161 | "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", |
---|
162 | "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref", |
---|
163 | "Brazero", "Braminzero", "Branumber", "Bra" |
---|
164 | }; |
---|
165 | |
---|
166 | |
---|
167 | static void print_internals(pcre *re) |
---|
168 | { |
---|
169 | unsigned char *code = ((real_pcre *)re)->code; |
---|
170 | |
---|
171 | fprintf(outfile, "------------------------------------------------------------------\n"); |
---|
172 | |
---|
173 | for(;;) |
---|
174 | { |
---|
175 | int c; |
---|
176 | int charlength; |
---|
177 | |
---|
178 | fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code)); |
---|
179 | |
---|
180 | if (*code >= OP_BRA) |
---|
181 | { |
---|
182 | if (*code - OP_BRA > EXTRACT_BASIC_MAX) |
---|
183 | fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]); |
---|
184 | else |
---|
185 | fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA); |
---|
186 | code += 2; |
---|
187 | } |
---|
188 | |
---|
189 | else switch(*code) |
---|
190 | { |
---|
191 | case OP_END: |
---|
192 | fprintf(outfile, " %s\n", OP_names[*code]); |
---|
193 | fprintf(outfile, "------------------------------------------------------------------\n"); |
---|
194 | return; |
---|
195 | |
---|
196 | case OP_OPT: |
---|
197 | fprintf(outfile, " %.2x %s", code[1], OP_names[*code]); |
---|
198 | code++; |
---|
199 | break; |
---|
200 | |
---|
201 | case OP_CHARS: |
---|
202 | charlength = *(++code); |
---|
203 | fprintf(outfile, "%3d ", charlength); |
---|
204 | while (charlength-- > 0) |
---|
205 | if (isprint(c = *(++code))) fprintf(outfile, "%c", c); |
---|
206 | else fprintf(outfile, "\\x%02x", c); |
---|
207 | break; |
---|
208 | |
---|
209 | case OP_KETRMAX: |
---|
210 | case OP_KETRMIN: |
---|
211 | case OP_ALT: |
---|
212 | case OP_KET: |
---|
213 | case OP_ASSERT: |
---|
214 | case OP_ASSERT_NOT: |
---|
215 | case OP_ASSERTBACK: |
---|
216 | case OP_ASSERTBACK_NOT: |
---|
217 | case OP_ONCE: |
---|
218 | case OP_COND: |
---|
219 | case OP_BRANUMBER: |
---|
220 | case OP_REVERSE: |
---|
221 | case OP_CREF: |
---|
222 | fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]); |
---|
223 | code += 2; |
---|
224 | break; |
---|
225 | |
---|
226 | case OP_STAR: |
---|
227 | case OP_MINSTAR: |
---|
228 | case OP_PLUS: |
---|
229 | case OP_MINPLUS: |
---|
230 | case OP_QUERY: |
---|
231 | case OP_MINQUERY: |
---|
232 | case OP_TYPESTAR: |
---|
233 | case OP_TYPEMINSTAR: |
---|
234 | case OP_TYPEPLUS: |
---|
235 | case OP_TYPEMINPLUS: |
---|
236 | case OP_TYPEQUERY: |
---|
237 | case OP_TYPEMINQUERY: |
---|
238 | if (*code >= OP_TYPESTAR) |
---|
239 | fprintf(outfile, " %s", OP_names[code[1]]); |
---|
240 | else if (isprint(c = code[1])) fprintf(outfile, " %c", c); |
---|
241 | else fprintf(outfile, " \\x%02x", c); |
---|
242 | fprintf(outfile, "%s", OP_names[*code++]); |
---|
243 | break; |
---|
244 | |
---|
245 | case OP_EXACT: |
---|
246 | case OP_UPTO: |
---|
247 | case OP_MINUPTO: |
---|
248 | if (isprint(c = code[3])) fprintf(outfile, " %c{", c); |
---|
249 | else fprintf(outfile, " \\x%02x{", c); |
---|
250 | if (*code != OP_EXACT) fprintf(outfile, ","); |
---|
251 | fprintf(outfile, "%d}", (code[1] << 8) + code[2]); |
---|
252 | if (*code == OP_MINUPTO) fprintf(outfile, "?"); |
---|
253 | code += 3; |
---|
254 | break; |
---|
255 | |
---|
256 | case OP_TYPEEXACT: |
---|
257 | case OP_TYPEUPTO: |
---|
258 | case OP_TYPEMINUPTO: |
---|
259 | fprintf(outfile, " %s{", OP_names[code[3]]); |
---|
260 | if (*code != OP_TYPEEXACT) fprintf(outfile, "0,"); |
---|
261 | fprintf(outfile, "%d}", (code[1] << 8) + code[2]); |
---|
262 | if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?"); |
---|
263 | code += 3; |
---|
264 | break; |
---|
265 | |
---|
266 | case OP_NOT: |
---|
267 | if (isprint(c = *(++code))) fprintf(outfile, " [^%c]", c); |
---|
268 | else fprintf(outfile, " [^\\x%02x]", c); |
---|
269 | break; |
---|
270 | |
---|
271 | case OP_NOTSTAR: |
---|
272 | case OP_NOTMINSTAR: |
---|
273 | case OP_NOTPLUS: |
---|
274 | case OP_NOTMINPLUS: |
---|
275 | case OP_NOTQUERY: |
---|
276 | case OP_NOTMINQUERY: |
---|
277 | if (isprint(c = code[1])) fprintf(outfile, " [^%c]", c); |
---|
278 | else fprintf(outfile, " [^\\x%02x]", c); |
---|
279 | fprintf(outfile, "%s", OP_names[*code++]); |
---|
280 | break; |
---|
281 | |
---|
282 | case OP_NOTEXACT: |
---|
283 | case OP_NOTUPTO: |
---|
284 | case OP_NOTMINUPTO: |
---|
285 | if (isprint(c = code[3])) fprintf(outfile, " [^%c]{", c); |
---|
286 | else fprintf(outfile, " [^\\x%02x]{", c); |
---|
287 | if (*code != OP_NOTEXACT) fprintf(outfile, ","); |
---|
288 | fprintf(outfile, "%d}", (code[1] << 8) + code[2]); |
---|
289 | if (*code == OP_NOTMINUPTO) fprintf(outfile, "?"); |
---|
290 | code += 3; |
---|
291 | break; |
---|
292 | |
---|
293 | case OP_REF: |
---|
294 | fprintf(outfile, " \\%d", (code[1] << 8) | code[2]); |
---|
295 | code += 3; |
---|
296 | goto CLASS_REF_REPEAT; |
---|
297 | |
---|
298 | case OP_CLASS: |
---|
299 | { |
---|
300 | int i, min, max; |
---|
301 | code++; |
---|
302 | fprintf(outfile, " ["); |
---|
303 | |
---|
304 | for (i = 0; i < 256; i++) |
---|
305 | { |
---|
306 | if ((code[i/8] & (1 << (i&7))) != 0) |
---|
307 | { |
---|
308 | int j; |
---|
309 | for (j = i+1; j < 256; j++) |
---|
310 | if ((code[j/8] & (1 << (j&7))) == 0) break; |
---|
311 | if (i == '-' || i == ']') fprintf(outfile, "\\"); |
---|
312 | if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i); |
---|
313 | if (--j > i) |
---|
314 | { |
---|
315 | fprintf(outfile, "-"); |
---|
316 | if (j == '-' || j == ']') fprintf(outfile, "\\"); |
---|
317 | if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j); |
---|
318 | } |
---|
319 | i = j; |
---|
320 | } |
---|
321 | } |
---|
322 | fprintf(outfile, "]"); |
---|
323 | code += 32; |
---|
324 | |
---|
325 | CLASS_REF_REPEAT: |
---|
326 | |
---|
327 | switch(*code) |
---|
328 | { |
---|
329 | case OP_CRSTAR: |
---|
330 | case OP_CRMINSTAR: |
---|
331 | case OP_CRPLUS: |
---|
332 | case OP_CRMINPLUS: |
---|
333 | case OP_CRQUERY: |
---|
334 | case OP_CRMINQUERY: |
---|
335 | fprintf(outfile, "%s", OP_names[*code]); |
---|
336 | break; |
---|
337 | |
---|
338 | case OP_CRRANGE: |
---|
339 | case OP_CRMINRANGE: |
---|
340 | min = (code[1] << 8) + code[2]; |
---|
341 | max = (code[3] << 8) + code[4]; |
---|
342 | if (max == 0) fprintf(outfile, "{%d,}", min); |
---|
343 | else fprintf(outfile, "{%d,%d}", min, max); |
---|
344 | if (*code == OP_CRMINRANGE) fprintf(outfile, "?"); |
---|
345 | code += 4; |
---|
346 | break; |
---|
347 | |
---|
348 | default: |
---|
349 | code--; |
---|
350 | } |
---|
351 | } |
---|
352 | break; |
---|
353 | |
---|
354 | /* Anything else is just a one-node item */ |
---|
355 | |
---|
356 | default: |
---|
357 | fprintf(outfile, " %s", OP_names[*code]); |
---|
358 | break; |
---|
359 | } |
---|
360 | |
---|
361 | code++; |
---|
362 | fprintf(outfile, "\n"); |
---|
363 | } |
---|
364 | } |
---|
365 | |
---|
366 | |
---|
367 | |
---|
368 | /* Character string printing function. A "normal" and a UTF-8 version. */ |
---|
369 | |
---|
370 | static void pchars(unsigned char *p, int length, int utf8) |
---|
371 | { |
---|
372 | int c; |
---|
373 | while (length-- > 0) |
---|
374 | { |
---|
375 | if (utf8) |
---|
376 | { |
---|
377 | int rc = utf82ord(p, &c); |
---|
378 | if (rc > 0) |
---|
379 | { |
---|
380 | length -= rc - 1; |
---|
381 | p += rc; |
---|
382 | if (c < 256 && isprint(c)) fprintf(outfile, "%c", c); |
---|
383 | else fprintf(outfile, "\\x{%02x}", c); |
---|
384 | continue; |
---|
385 | } |
---|
386 | } |
---|
387 | |
---|
388 | /* Not UTF-8, or malformed UTF-8 */ |
---|
389 | |
---|
390 | if (isprint(c = *(p++))) fprintf(outfile, "%c", c); |
---|
391 | else fprintf(outfile, "\\x%02x", c); |
---|
392 | } |
---|
393 | } |
---|
394 | |
---|
395 | |
---|
396 | |
---|
397 | /* Alternative malloc function, to test functionality and show the size of the |
---|
398 | compiled re. */ |
---|
399 | |
---|
400 | static void *new_malloc(size_t size) |
---|
401 | { |
---|
402 | gotten_store = size; |
---|
403 | if (log_store) |
---|
404 | fprintf(outfile, "Memory allocation (code space): %d\n", |
---|
405 | (int)((int)size - offsetof(real_pcre, code[0]))); |
---|
406 | return malloc(size); |
---|
407 | } |
---|
408 | |
---|
409 | |
---|
410 | |
---|
411 | |
---|
412 | /* Get one piece of information from the pcre_fullinfo() function */ |
---|
413 | |
---|
414 | static void new_info(pcre *re, pcre_extra *study, int option, void *ptr) |
---|
415 | { |
---|
416 | int rc; |
---|
417 | if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0) |
---|
418 | fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option); |
---|
419 | } |
---|
420 | |
---|
421 | |
---|
422 | |
---|
423 | |
---|
424 | /* Read lines from named file or stdin and write to named file or stdout; lines |
---|
425 | consist of a regular expression, in delimiters and optionally followed by |
---|
426 | options, followed by a set of test data, terminated by an empty line. */ |
---|
427 | |
---|
428 | int main(int argc, char **argv) |
---|
429 | { |
---|
430 | FILE *infile = stdin; |
---|
431 | int options = 0; |
---|
432 | int study_options = 0; |
---|
433 | int op = 1; |
---|
434 | int timeit = 0; |
---|
435 | int showinfo = 0; |
---|
436 | int showstore = 0; |
---|
437 | int size_offsets = 45; |
---|
438 | int size_offsets_max; |
---|
439 | int *offsets; |
---|
440 | #if !defined NOPOSIX |
---|
441 | int posix = 0; |
---|
442 | #endif |
---|
443 | int debug = 0; |
---|
444 | int done = 0; |
---|
445 | unsigned char buffer[30000]; |
---|
446 | unsigned char dbuffer[1024]; |
---|
447 | |
---|
448 | /* Static so that new_malloc can use it. */ |
---|
449 | |
---|
450 | outfile = stdout; |
---|
451 | |
---|
452 | /* Scan options */ |
---|
453 | |
---|
454 | while (argc > 1 && argv[op][0] == '-') |
---|
455 | { |
---|
456 | char *endptr; |
---|
457 | |
---|
458 | if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0) |
---|
459 | showstore = 1; |
---|
460 | else if (strcmp(argv[op], "-t") == 0) timeit = 1; |
---|
461 | else if (strcmp(argv[op], "-i") == 0) showinfo = 1; |
---|
462 | else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1; |
---|
463 | else if (strcmp(argv[op], "-o") == 0 && argc > 2 && |
---|
464 | ((size_offsets = (int)strtoul(argv[op+1], &endptr, 10)), *endptr == 0)) |
---|
465 | { |
---|
466 | op++; |
---|
467 | argc--; |
---|
468 | } |
---|
469 | #if !defined NOPOSIX |
---|
470 | else if (strcmp(argv[op], "-p") == 0) posix = 1; |
---|
471 | #endif |
---|
472 | else |
---|
473 | { |
---|
474 | printf("** Unknown or malformed option %s\n", argv[op]); |
---|
475 | printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n"); |
---|
476 | printf(" -d debug: show compiled code; implies -i\n" |
---|
477 | " -i show information about compiled pattern\n" |
---|
478 | " -o <n> set size of offsets vector to <n>\n"); |
---|
479 | #if !defined NOPOSIX |
---|
480 | printf(" -p use POSIX interface\n"); |
---|
481 | #endif |
---|
482 | printf(" -s output store information\n" |
---|
483 | " -t time compilation and execution\n"); |
---|
484 | return 1; |
---|
485 | } |
---|
486 | op++; |
---|
487 | argc--; |
---|
488 | } |
---|
489 | |
---|
490 | /* Get the store for the offsets vector, and remember what it was */ |
---|
491 | |
---|
492 | size_offsets_max = size_offsets; |
---|
493 | offsets = malloc(size_offsets_max * sizeof(int)); |
---|
494 | if (offsets == NULL) |
---|
495 | { |
---|
496 | printf("** Failed to get %d bytes of memory for offsets vector\n", |
---|
497 | size_offsets_max * sizeof(int)); |
---|
498 | return 1; |
---|
499 | } |
---|
500 | |
---|
501 | /* Sort out the input and output files */ |
---|
502 | |
---|
503 | if (argc > 1) |
---|
504 | { |
---|
505 | infile = fopen(argv[op], "r"); |
---|
506 | if (infile == NULL) |
---|
507 | { |
---|
508 | printf("** Failed to open %s\n", argv[op]); |
---|
509 | return 1; |
---|
510 | } |
---|
511 | } |
---|
512 | |
---|
513 | if (argc > 2) |
---|
514 | { |
---|
515 | outfile = fopen(argv[op+1], "w"); |
---|
516 | if (outfile == NULL) |
---|
517 | { |
---|
518 | printf("** Failed to open %s\n", argv[op+1]); |
---|
519 | return 1; |
---|
520 | } |
---|
521 | } |
---|
522 | |
---|
523 | /* Set alternative malloc function */ |
---|
524 | |
---|
525 | pcre_malloc = new_malloc; |
---|
526 | |
---|
527 | /* Heading line, then prompt for first regex if stdin */ |
---|
528 | |
---|
529 | fprintf(outfile, "PCRE version %s\n\n", pcre_version()); |
---|
530 | |
---|
531 | /* Main loop */ |
---|
532 | |
---|
533 | while (!done) |
---|
534 | { |
---|
535 | pcre *re = NULL; |
---|
536 | pcre_extra *extra = NULL; |
---|
537 | |
---|
538 | #if !defined NOPOSIX /* There are still compilers that require no indent */ |
---|
539 | regex_t preg; |
---|
540 | int do_posix = 0; |
---|
541 | #endif |
---|
542 | |
---|
543 | const char *error; |
---|
544 | unsigned char *p, *pp, *ppp; |
---|
545 | const unsigned char *tables = NULL; |
---|
546 | int do_study = 0; |
---|
547 | int do_debug = debug; |
---|
548 | int do_G = 0; |
---|
549 | int do_g = 0; |
---|
550 | int do_showinfo = showinfo; |
---|
551 | int do_showrest = 0; |
---|
552 | int utf8 = 0; |
---|
553 | int erroroffset, len, delimiter; |
---|
554 | |
---|
555 | if (infile == stdin) printf(" re> "); |
---|
556 | if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break; |
---|
557 | if (infile != stdin) fprintf(outfile, "%s", (char *)buffer); |
---|
558 | |
---|
559 | p = buffer; |
---|
560 | while (isspace(*p)) p++; |
---|
561 | if (*p == 0) continue; |
---|
562 | |
---|
563 | /* Get the delimiter and seek the end of the pattern; if is isn't |
---|
564 | complete, read more. */ |
---|
565 | |
---|
566 | delimiter = *p++; |
---|
567 | |
---|
568 | if (isalnum(delimiter) || delimiter == '\\') |
---|
569 | { |
---|
570 | fprintf(outfile, "** Delimiter must not be alphameric or \\\n"); |
---|
571 | goto SKIP_DATA; |
---|
572 | } |
---|
573 | |
---|
574 | pp = p; |
---|
575 | |
---|
576 | for(;;) |
---|
577 | { |
---|
578 | while (*pp != 0) |
---|
579 | { |
---|
580 | if (*pp == '\\' && pp[1] != 0) pp++; |
---|
581 | else if (*pp == delimiter) break; |
---|
582 | pp++; |
---|
583 | } |
---|
584 | if (*pp != 0) break; |
---|
585 | |
---|
586 | len = sizeof(buffer) - (pp - buffer); |
---|
587 | if (len < 256) |
---|
588 | { |
---|
589 | fprintf(outfile, "** Expression too long - missing delimiter?\n"); |
---|
590 | goto SKIP_DATA; |
---|
591 | } |
---|
592 | |
---|
593 | if (infile == stdin) printf(" > "); |
---|
594 | if (fgets((char *)pp, len, infile) == NULL) |
---|
595 | { |
---|
596 | fprintf(outfile, "** Unexpected EOF\n"); |
---|
597 | done = 1; |
---|
598 | goto CONTINUE; |
---|
599 | } |
---|
600 | if (infile != stdin) fprintf(outfile, "%s", (char *)pp); |
---|
601 | } |
---|
602 | |
---|
603 | /* If the first character after the delimiter is backslash, make |
---|
604 | the pattern end with backslash. This is purely to provide a way |
---|
605 | of testing for the error message when a pattern ends with backslash. */ |
---|
606 | |
---|
607 | if (pp[1] == '\\') *pp++ = '\\'; |
---|
608 | |
---|
609 | /* Terminate the pattern at the delimiter */ |
---|
610 | |
---|
611 | *pp++ = 0; |
---|
612 | |
---|
613 | /* Look for options after final delimiter */ |
---|
614 | |
---|
615 | options = 0; |
---|
616 | study_options = 0; |
---|
617 | log_store = showstore; /* default from command line */ |
---|
618 | |
---|
619 | while (*pp != 0) |
---|
620 | { |
---|
621 | switch (*pp++) |
---|
622 | { |
---|
623 | case 'g': do_g = 1; break; |
---|
624 | case 'i': options |= PCRE_CASELESS; break; |
---|
625 | case 'm': options |= PCRE_MULTILINE; break; |
---|
626 | case 's': options |= PCRE_DOTALL; break; |
---|
627 | case 'x': options |= PCRE_EXTENDED; break; |
---|
628 | |
---|
629 | case '+': do_showrest = 1; break; |
---|
630 | case 'A': options |= PCRE_ANCHORED; break; |
---|
631 | case 'D': do_debug = do_showinfo = 1; break; |
---|
632 | case 'E': options |= PCRE_DOLLAR_ENDONLY; break; |
---|
633 | case 'G': do_G = 1; break; |
---|
634 | case 'I': do_showinfo = 1; break; |
---|
635 | case 'M': log_store = 1; break; |
---|
636 | |
---|
637 | #if !defined NOPOSIX |
---|
638 | case 'P': do_posix = 1; break; |
---|
639 | #endif |
---|
640 | |
---|
641 | case 'S': do_study = 1; break; |
---|
642 | case 'U': options |= PCRE_UNGREEDY; break; |
---|
643 | case 'X': options |= PCRE_EXTRA; break; |
---|
644 | case '8': options |= PCRE_UTF8; utf8 = 1; break; |
---|
645 | |
---|
646 | case 'L': |
---|
647 | ppp = pp; |
---|
648 | while (*ppp != '\n' && *ppp != ' ') ppp++; |
---|
649 | *ppp = 0; |
---|
650 | if (setlocale(LC_CTYPE, (const char *)pp) == NULL) |
---|
651 | { |
---|
652 | fprintf(outfile, "** Failed to set locale \"%s\"\n", pp); |
---|
653 | goto SKIP_DATA; |
---|
654 | } |
---|
655 | tables = pcre_maketables(); |
---|
656 | pp = ppp; |
---|
657 | break; |
---|
658 | |
---|
659 | case '\n': case ' ': break; |
---|
660 | default: |
---|
661 | fprintf(outfile, "** Unknown option '%c'\n", pp[-1]); |
---|
662 | goto SKIP_DATA; |
---|
663 | } |
---|
664 | } |
---|
665 | |
---|
666 | /* Handle compiling via the POSIX interface, which doesn't support the |
---|
667 | timing, showing, or debugging options, nor the ability to pass over |
---|
668 | local character tables. */ |
---|
669 | |
---|
670 | #if !defined NOPOSIX |
---|
671 | if (posix || do_posix) |
---|
672 | { |
---|
673 | int rc; |
---|
674 | int cflags = 0; |
---|
675 | if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE; |
---|
676 | if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE; |
---|
677 | rc = regcomp(&preg, (char *)p, cflags); |
---|
678 | |
---|
679 | /* Compilation failed; go back for another re, skipping to blank line |
---|
680 | if non-interactive. */ |
---|
681 | |
---|
682 | if (rc != 0) |
---|
683 | { |
---|
684 | (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer)); |
---|
685 | fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer); |
---|
686 | goto SKIP_DATA; |
---|
687 | } |
---|
688 | } |
---|
689 | |
---|
690 | /* Handle compiling via the native interface */ |
---|
691 | |
---|
692 | else |
---|
693 | #endif /* !defined NOPOSIX */ |
---|
694 | |
---|
695 | { |
---|
696 | if (timeit) |
---|
697 | { |
---|
698 | register int i; |
---|
699 | clock_t time_taken; |
---|
700 | clock_t start_time = clock(); |
---|
701 | for (i = 0; i < LOOPREPEAT; i++) |
---|
702 | { |
---|
703 | re = pcre_compile((char *)p, options, &error, &erroroffset, tables); |
---|
704 | if (re != NULL) free(re); |
---|
705 | } |
---|
706 | time_taken = clock() - start_time; |
---|
707 | fprintf(outfile, "Compile time %.3f milliseconds\n", |
---|
708 | ((double)time_taken * 1000.0) / |
---|
709 | ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC)); |
---|
710 | } |
---|
711 | |
---|
712 | re = pcre_compile((char *)p, options, &error, &erroroffset, tables); |
---|
713 | |
---|
714 | /* Compilation failed; go back for another re, skipping to blank line |
---|
715 | if non-interactive. */ |
---|
716 | |
---|
717 | if (re == NULL) |
---|
718 | { |
---|
719 | fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset); |
---|
720 | SKIP_DATA: |
---|
721 | if (infile != stdin) |
---|
722 | { |
---|
723 | for (;;) |
---|
724 | { |
---|
725 | if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) |
---|
726 | { |
---|
727 | done = 1; |
---|
728 | goto CONTINUE; |
---|
729 | } |
---|
730 | len = (int)strlen((char *)buffer); |
---|
731 | while (len > 0 && isspace(buffer[len-1])) len--; |
---|
732 | if (len == 0) break; |
---|
733 | } |
---|
734 | fprintf(outfile, "\n"); |
---|
735 | } |
---|
736 | goto CONTINUE; |
---|
737 | } |
---|
738 | |
---|
739 | /* Compilation succeeded; print data if required. There are now two |
---|
740 | info-returning functions. The old one has a limited interface and |
---|
741 | returns only limited data. Check that it agrees with the newer one. */ |
---|
742 | |
---|
743 | if (do_showinfo) |
---|
744 | { |
---|
745 | unsigned long int get_options; |
---|
746 | int old_first_char, old_options, old_count; |
---|
747 | int count, backrefmax, first_char, need_char; |
---|
748 | size_t size; |
---|
749 | |
---|
750 | if (do_debug) print_internals(re); |
---|
751 | |
---|
752 | new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options); |
---|
753 | new_info(re, NULL, PCRE_INFO_SIZE, &size); |
---|
754 | new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count); |
---|
755 | new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax); |
---|
756 | new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char); |
---|
757 | new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char); |
---|
758 | |
---|
759 | old_count = pcre_info(re, &old_options, &old_first_char); |
---|
760 | if (count < 0) fprintf(outfile, |
---|
761 | "Error %d from pcre_info()\n", count); |
---|
762 | else |
---|
763 | { |
---|
764 | if (old_count != count) fprintf(outfile, |
---|
765 | "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count, |
---|
766 | old_count); |
---|
767 | |
---|
768 | if (old_first_char != first_char) fprintf(outfile, |
---|
769 | "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n", |
---|
770 | first_char, old_first_char); |
---|
771 | |
---|
772 | if (old_options != (int)get_options) fprintf(outfile, |
---|
773 | "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n", |
---|
774 | get_options, old_options); |
---|
775 | } |
---|
776 | |
---|
777 | if (size != gotten_store) fprintf(outfile, |
---|
778 | "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n", |
---|
779 | size, gotten_store); |
---|
780 | |
---|
781 | fprintf(outfile, "Capturing subpattern count = %d\n", count); |
---|
782 | if (backrefmax > 0) |
---|
783 | fprintf(outfile, "Max back reference = %d\n", backrefmax); |
---|
784 | if (get_options == 0) fprintf(outfile, "No options\n"); |
---|
785 | else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n", |
---|
786 | ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "", |
---|
787 | ((get_options & PCRE_CASELESS) != 0)? " caseless" : "", |
---|
788 | ((get_options & PCRE_EXTENDED) != 0)? " extended" : "", |
---|
789 | ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "", |
---|
790 | ((get_options & PCRE_DOTALL) != 0)? " dotall" : "", |
---|
791 | ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "", |
---|
792 | ((get_options & PCRE_EXTRA) != 0)? " extra" : "", |
---|
793 | ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "", |
---|
794 | ((get_options & PCRE_UTF8) != 0)? " utf8" : ""); |
---|
795 | |
---|
796 | if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0) |
---|
797 | fprintf(outfile, "Case state changes\n"); |
---|
798 | |
---|
799 | if (first_char == -1) |
---|
800 | { |
---|
801 | fprintf(outfile, "First char at start or follows \\n\n"); |
---|
802 | } |
---|
803 | else if (first_char < 0) |
---|
804 | { |
---|
805 | fprintf(outfile, "No first char\n"); |
---|
806 | } |
---|
807 | else |
---|
808 | { |
---|
809 | if (isprint(first_char)) |
---|
810 | fprintf(outfile, "First char = \'%c\'\n", first_char); |
---|
811 | else |
---|
812 | fprintf(outfile, "First char = %d\n", first_char); |
---|
813 | } |
---|
814 | |
---|
815 | if (need_char < 0) |
---|
816 | { |
---|
817 | fprintf(outfile, "No need char\n"); |
---|
818 | } |
---|
819 | else |
---|
820 | { |
---|
821 | if (isprint(need_char)) |
---|
822 | fprintf(outfile, "Need char = \'%c\'\n", need_char); |
---|
823 | else |
---|
824 | fprintf(outfile, "Need char = %d\n", need_char); |
---|
825 | } |
---|
826 | } |
---|
827 | |
---|
828 | /* If /S was present, study the regexp to generate additional info to |
---|
829 | help with the matching. */ |
---|
830 | |
---|
831 | if (do_study) |
---|
832 | { |
---|
833 | if (timeit) |
---|
834 | { |
---|
835 | register int i; |
---|
836 | clock_t time_taken; |
---|
837 | clock_t start_time = clock(); |
---|
838 | for (i = 0; i < LOOPREPEAT; i++) |
---|
839 | extra = pcre_study(re, study_options, &error); |
---|
840 | time_taken = clock() - start_time; |
---|
841 | if (extra != NULL) free(extra); |
---|
842 | fprintf(outfile, " Study time %.3f milliseconds\n", |
---|
843 | ((double)time_taken * 1000.0)/ |
---|
844 | ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC)); |
---|
845 | } |
---|
846 | |
---|
847 | extra = pcre_study(re, study_options, &error); |
---|
848 | if (error != NULL) |
---|
849 | fprintf(outfile, "Failed to study: %s\n", error); |
---|
850 | else if (extra == NULL) |
---|
851 | fprintf(outfile, "Study returned NULL\n"); |
---|
852 | |
---|
853 | else if (do_showinfo) |
---|
854 | { |
---|
855 | uschar *start_bits = NULL; |
---|
856 | new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits); |
---|
857 | if (start_bits == NULL) |
---|
858 | fprintf(outfile, "No starting character set\n"); |
---|
859 | else |
---|
860 | { |
---|
861 | int i; |
---|
862 | int c = 24; |
---|
863 | fprintf(outfile, "Starting character set: "); |
---|
864 | for (i = 0; i < 256; i++) |
---|
865 | { |
---|
866 | if ((start_bits[i/8] & (1<<(i%8))) != 0) |
---|
867 | { |
---|
868 | if (c > 75) |
---|
869 | { |
---|
870 | fprintf(outfile, "\n "); |
---|
871 | c = 2; |
---|
872 | } |
---|
873 | if (isprint(i) && i != ' ') |
---|
874 | { |
---|
875 | fprintf(outfile, "%c ", i); |
---|
876 | c += 2; |
---|
877 | } |
---|
878 | else |
---|
879 | { |
---|
880 | fprintf(outfile, "\\x%02x ", i); |
---|
881 | c += 5; |
---|
882 | } |
---|
883 | } |
---|
884 | } |
---|
885 | fprintf(outfile, "\n"); |
---|
886 | } |
---|
887 | } |
---|
888 | } |
---|
889 | } |
---|
890 | |
---|
891 | /* Read data lines and test them */ |
---|
892 | |
---|
893 | for (;;) |
---|
894 | { |
---|
895 | unsigned char *q; |
---|
896 | unsigned char *bptr = dbuffer; |
---|
897 | int *use_offsets = offsets; |
---|
898 | int use_size_offsets = size_offsets; |
---|
899 | int count, c; |
---|
900 | int copystrings = 0; |
---|
901 | int getstrings = 0; |
---|
902 | int getlist = 0; |
---|
903 | int gmatched = 0; |
---|
904 | int start_offset = 0; |
---|
905 | int g_notempty = 0; |
---|
906 | |
---|
907 | options = 0; |
---|
908 | |
---|
909 | if (infile == stdin) printf("data> "); |
---|
910 | if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) |
---|
911 | { |
---|
912 | done = 1; |
---|
913 | goto CONTINUE; |
---|
914 | } |
---|
915 | if (infile != stdin) fprintf(outfile, "%s", (char *)buffer); |
---|
916 | |
---|
917 | len = (int)strlen((char *)buffer); |
---|
918 | while (len > 0 && isspace(buffer[len-1])) len--; |
---|
919 | buffer[len] = 0; |
---|
920 | if (len == 0) break; |
---|
921 | |
---|
922 | p = buffer; |
---|
923 | while (isspace(*p)) p++; |
---|
924 | |
---|
925 | q = dbuffer; |
---|
926 | while ((c = *p++) != 0) |
---|
927 | { |
---|
928 | int i = 0; |
---|
929 | int n = 0; |
---|
930 | if (c == '\\') switch ((c = *p++)) |
---|
931 | { |
---|
932 | case 'a': c = 7; break; |
---|
933 | case 'b': c = '\b'; break; |
---|
934 | case 'e': c = 27; break; |
---|
935 | case 'f': c = '\f'; break; |
---|
936 | case 'n': c = '\n'; break; |
---|
937 | case 'r': c = '\r'; break; |
---|
938 | case 't': c = '\t'; break; |
---|
939 | case 'v': c = '\v'; break; |
---|
940 | |
---|
941 | case '0': case '1': case '2': case '3': |
---|
942 | case '4': case '5': case '6': case '7': |
---|
943 | c -= '0'; |
---|
944 | while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9') |
---|
945 | c = c * 8 + *p++ - '0'; |
---|
946 | break; |
---|
947 | |
---|
948 | case 'x': |
---|
949 | |
---|
950 | /* Handle \x{..} specially - new Perl thing for utf8 */ |
---|
951 | |
---|
952 | if (*p == '{') |
---|
953 | { |
---|
954 | unsigned char *pt = p; |
---|
955 | c = 0; |
---|
956 | while (isxdigit(*(++pt))) |
---|
957 | c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W'); |
---|
958 | if (*pt == '}') |
---|
959 | { |
---|
960 | unsigned char buffer[8]; |
---|
961 | int ii, utn; |
---|
962 | utn = ord2utf8(c, buffer); |
---|
963 | for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii]; |
---|
964 | c = buffer[ii]; /* Last byte */ |
---|
965 | p = pt + 1; |
---|
966 | break; |
---|
967 | } |
---|
968 | /* Not correct form; fall through */ |
---|
969 | } |
---|
970 | |
---|
971 | /* Ordinary \x */ |
---|
972 | |
---|
973 | c = 0; |
---|
974 | while (i++ < 2 && isxdigit(*p)) |
---|
975 | { |
---|
976 | c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W'); |
---|
977 | p++; |
---|
978 | } |
---|
979 | break; |
---|
980 | |
---|
981 | case 0: /* Allows for an empty line */ |
---|
982 | p--; |
---|
983 | continue; |
---|
984 | |
---|
985 | case 'A': /* Option setting */ |
---|
986 | options |= PCRE_ANCHORED; |
---|
987 | continue; |
---|
988 | |
---|
989 | case 'B': |
---|
990 | options |= PCRE_NOTBOL; |
---|
991 | continue; |
---|
992 | |
---|
993 | case 'C': |
---|
994 | while(isdigit(*p)) n = n * 10 + *p++ - '0'; |
---|
995 | copystrings |= 1 << n; |
---|
996 | continue; |
---|
997 | |
---|
998 | case 'G': |
---|
999 | while(isdigit(*p)) n = n * 10 + *p++ - '0'; |
---|
1000 | getstrings |= 1 << n; |
---|
1001 | continue; |
---|
1002 | |
---|
1003 | case 'L': |
---|
1004 | getlist = 1; |
---|
1005 | continue; |
---|
1006 | |
---|
1007 | case 'N': |
---|
1008 | options |= PCRE_NOTEMPTY; |
---|
1009 | continue; |
---|
1010 | |
---|
1011 | case 'O': |
---|
1012 | while(isdigit(*p)) n = n * 10 + *p++ - '0'; |
---|
1013 | if (n > size_offsets_max) |
---|
1014 | { |
---|
1015 | size_offsets_max = n; |
---|
1016 | free(offsets); |
---|
1017 | use_offsets = offsets = malloc(size_offsets_max * sizeof(int)); |
---|
1018 | if (offsets == NULL) |
---|
1019 | { |
---|
1020 | printf("** Failed to get %d bytes of memory for offsets vector\n", |
---|
1021 | size_offsets_max * sizeof(int)); |
---|
1022 | return 1; |
---|
1023 | } |
---|
1024 | } |
---|
1025 | use_size_offsets = n; |
---|
1026 | if (n == 0) use_offsets = NULL; |
---|
1027 | continue; |
---|
1028 | |
---|
1029 | case 'Z': |
---|
1030 | options |= PCRE_NOTEOL; |
---|
1031 | continue; |
---|
1032 | } |
---|
1033 | *q++ = c; |
---|
1034 | } |
---|
1035 | *q = 0; |
---|
1036 | len = q - dbuffer; |
---|
1037 | |
---|
1038 | /* Handle matching via the POSIX interface, which does not |
---|
1039 | support timing. */ |
---|
1040 | |
---|
1041 | #if !defined NOPOSIX |
---|
1042 | if (posix || do_posix) |
---|
1043 | { |
---|
1044 | int rc; |
---|
1045 | int eflags = 0; |
---|
1046 | regmatch_t *pmatch = malloc(sizeof(regmatch_t) * use_size_offsets); |
---|
1047 | if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL; |
---|
1048 | if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL; |
---|
1049 | |
---|
1050 | rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags); |
---|
1051 | |
---|
1052 | if (rc != 0) |
---|
1053 | { |
---|
1054 | (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer)); |
---|
1055 | fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer); |
---|
1056 | } |
---|
1057 | else |
---|
1058 | { |
---|
1059 | size_t i; |
---|
1060 | for (i = 0; i < use_size_offsets; i++) |
---|
1061 | { |
---|
1062 | if (pmatch[i].rm_so >= 0) |
---|
1063 | { |
---|
1064 | fprintf(outfile, "%2d: ", (int)i); |
---|
1065 | pchars(dbuffer + pmatch[i].rm_so, |
---|
1066 | pmatch[i].rm_eo - pmatch[i].rm_so, utf8); |
---|
1067 | fprintf(outfile, "\n"); |
---|
1068 | if (i == 0 && do_showrest) |
---|
1069 | { |
---|
1070 | fprintf(outfile, " 0+ "); |
---|
1071 | pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8); |
---|
1072 | fprintf(outfile, "\n"); |
---|
1073 | } |
---|
1074 | } |
---|
1075 | } |
---|
1076 | } |
---|
1077 | free(pmatch); |
---|
1078 | } |
---|
1079 | |
---|
1080 | /* Handle matching via the native interface - repeats for /g and /G */ |
---|
1081 | |
---|
1082 | else |
---|
1083 | #endif /* !defined NOPOSIX */ |
---|
1084 | |
---|
1085 | for (;; gmatched++) /* Loop for /g or /G */ |
---|
1086 | { |
---|
1087 | if (timeit) |
---|
1088 | { |
---|
1089 | register int i; |
---|
1090 | clock_t time_taken; |
---|
1091 | clock_t start_time = clock(); |
---|
1092 | for (i = 0; i < LOOPREPEAT; i++) |
---|
1093 | count = pcre_exec(re, extra, (char *)bptr, len, |
---|
1094 | start_offset, options | g_notempty, use_offsets, use_size_offsets); |
---|
1095 | time_taken = clock() - start_time; |
---|
1096 | fprintf(outfile, "Execute time %.3f milliseconds\n", |
---|
1097 | ((double)time_taken * 1000.0)/ |
---|
1098 | ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC)); |
---|
1099 | } |
---|
1100 | |
---|
1101 | count = pcre_exec(re, extra, (char *)bptr, len, |
---|
1102 | start_offset, options | g_notempty, use_offsets, use_size_offsets); |
---|
1103 | |
---|
1104 | if (count == 0) |
---|
1105 | { |
---|
1106 | fprintf(outfile, "Matched, but too many substrings\n"); |
---|
1107 | count = use_size_offsets/3; |
---|
1108 | } |
---|
1109 | |
---|
1110 | /* Matched */ |
---|
1111 | |
---|
1112 | if (count >= 0) |
---|
1113 | { |
---|
1114 | int i; |
---|
1115 | for (i = 0; i < count * 2; i += 2) |
---|
1116 | { |
---|
1117 | if (use_offsets[i] < 0) |
---|
1118 | fprintf(outfile, "%2d: <unset>\n", i/2); |
---|
1119 | else |
---|
1120 | { |
---|
1121 | fprintf(outfile, "%2d: ", i/2); |
---|
1122 | pchars(bptr + use_offsets[i], use_offsets[i+1] - use_offsets[i], utf8); |
---|
1123 | fprintf(outfile, "\n"); |
---|
1124 | if (i == 0) |
---|
1125 | { |
---|
1126 | if (do_showrest) |
---|
1127 | { |
---|
1128 | fprintf(outfile, " 0+ "); |
---|
1129 | pchars(bptr + use_offsets[i+1], len - use_offsets[i+1], utf8); |
---|
1130 | fprintf(outfile, "\n"); |
---|
1131 | } |
---|
1132 | } |
---|
1133 | } |
---|
1134 | } |
---|
1135 | |
---|
1136 | for (i = 0; i < 32; i++) |
---|
1137 | { |
---|
1138 | if ((copystrings & (1 << i)) != 0) |
---|
1139 | { |
---|
1140 | char copybuffer[16]; |
---|
1141 | int rc = pcre_copy_substring((char *)bptr, use_offsets, count, |
---|
1142 | i, copybuffer, sizeof(copybuffer)); |
---|
1143 | if (rc < 0) |
---|
1144 | fprintf(outfile, "copy substring %d failed %d\n", i, rc); |
---|
1145 | else |
---|
1146 | fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc); |
---|
1147 | } |
---|
1148 | } |
---|
1149 | |
---|
1150 | for (i = 0; i < 32; i++) |
---|
1151 | { |
---|
1152 | if ((getstrings & (1 << i)) != 0) |
---|
1153 | { |
---|
1154 | const char *substring; |
---|
1155 | int rc = pcre_get_substring((char *)bptr, use_offsets, count, |
---|
1156 | i, &substring); |
---|
1157 | if (rc < 0) |
---|
1158 | fprintf(outfile, "get substring %d failed %d\n", i, rc); |
---|
1159 | else |
---|
1160 | { |
---|
1161 | fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc); |
---|
1162 | /* free((void *)substring); */ |
---|
1163 | pcre_free_substring(substring); |
---|
1164 | } |
---|
1165 | } |
---|
1166 | } |
---|
1167 | |
---|
1168 | if (getlist) |
---|
1169 | { |
---|
1170 | const char **stringlist; |
---|
1171 | int rc = pcre_get_substring_list((char *)bptr, use_offsets, count, |
---|
1172 | &stringlist); |
---|
1173 | if (rc < 0) |
---|
1174 | fprintf(outfile, "get substring list failed %d\n", rc); |
---|
1175 | else |
---|
1176 | { |
---|
1177 | for (i = 0; i < count; i++) |
---|
1178 | fprintf(outfile, "%2dL %s\n", i, stringlist[i]); |
---|
1179 | if (stringlist[i] != NULL) |
---|
1180 | fprintf(outfile, "string list not terminated by NULL\n"); |
---|
1181 | /* free((void *)stringlist); */ |
---|
1182 | pcre_free_substring_list(stringlist); |
---|
1183 | } |
---|
1184 | } |
---|
1185 | } |
---|
1186 | |
---|
1187 | /* Failed to match. If this is a /g or /G loop and we previously set |
---|
1188 | g_notempty after a null match, this is not necessarily the end. |
---|
1189 | We want to advance the start offset, and continue. Fudge the offset |
---|
1190 | values to achieve this. We won't be at the end of the string - that |
---|
1191 | was checked before setting g_notempty. */ |
---|
1192 | |
---|
1193 | else |
---|
1194 | { |
---|
1195 | if (g_notempty != 0) |
---|
1196 | { |
---|
1197 | use_offsets[0] = start_offset; |
---|
1198 | use_offsets[1] = start_offset + 1; |
---|
1199 | } |
---|
1200 | else |
---|
1201 | { |
---|
1202 | if (gmatched == 0) /* Error if no previous matches */ |
---|
1203 | { |
---|
1204 | if (count == -1) fprintf(outfile, "No match\n"); |
---|
1205 | else fprintf(outfile, "Error %d\n", count); |
---|
1206 | } |
---|
1207 | break; /* Out of the /g loop */ |
---|
1208 | } |
---|
1209 | } |
---|
1210 | |
---|
1211 | /* If not /g or /G we are done */ |
---|
1212 | |
---|
1213 | if (!do_g && !do_G) break; |
---|
1214 | |
---|
1215 | /* If we have matched an empty string, first check to see if we are at |
---|
1216 | the end of the subject. If so, the /g loop is over. Otherwise, mimic |
---|
1217 | what Perl's /g options does. This turns out to be rather cunning. First |
---|
1218 | we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the |
---|
1219 | same point. If this fails (picked up above) we advance to the next |
---|
1220 | character. */ |
---|
1221 | |
---|
1222 | g_notempty = 0; |
---|
1223 | if (use_offsets[0] == use_offsets[1]) |
---|
1224 | { |
---|
1225 | if (use_offsets[0] == len) break; |
---|
1226 | g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED; |
---|
1227 | } |
---|
1228 | |
---|
1229 | /* For /g, update the start offset, leaving the rest alone */ |
---|
1230 | |
---|
1231 | if (do_g) start_offset = use_offsets[1]; |
---|
1232 | |
---|
1233 | /* For /G, update the pointer and length */ |
---|
1234 | |
---|
1235 | else |
---|
1236 | { |
---|
1237 | bptr += use_offsets[1]; |
---|
1238 | len -= use_offsets[1]; |
---|
1239 | } |
---|
1240 | } /* End of loop for /g and /G */ |
---|
1241 | } /* End of loop for data lines */ |
---|
1242 | |
---|
1243 | CONTINUE: |
---|
1244 | |
---|
1245 | #if !defined NOPOSIX |
---|
1246 | if (posix || do_posix) regfree(&preg); |
---|
1247 | #endif |
---|
1248 | |
---|
1249 | if (re != NULL) free(re); |
---|
1250 | if (extra != NULL) free(extra); |
---|
1251 | if (tables != NULL) |
---|
1252 | { |
---|
1253 | free((void *)tables); |
---|
1254 | setlocale(LC_CTYPE, "C"); |
---|
1255 | } |
---|
1256 | } |
---|
1257 | |
---|
1258 | fprintf(outfile, "\n"); |
---|
1259 | return 0; |
---|
1260 | } |
---|
1261 | |
---|
1262 | /* End */ |
---|