1 | /* Copyright (C) 1995, 1996 Tom Lord |
---|
2 | * |
---|
3 | * This program is free software; you can redistribute it and/or modify |
---|
4 | * it under the terms of the GNU Library General Public License as published by |
---|
5 | * the Free Software Foundation; either version 2, or (at your option) |
---|
6 | * any later version. |
---|
7 | * |
---|
8 | * This program is distributed in the hope that it will be useful, |
---|
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
11 | * GNU Library General Public License for more details. |
---|
12 | * |
---|
13 | * You should have received a copy of the GNU Library General Public License |
---|
14 | * along with this software; see the file COPYING. If not, write to |
---|
15 | * the Free Software Foundation, 59 Temple Place - Suite 330, |
---|
16 | * Boston, MA 02111-1307, USA. |
---|
17 | */ |
---|
18 | |
---|
19 | |
---|
20 | |
---|
21 | #include "rxall.h" |
---|
22 | #include "rxposix.h" |
---|
23 | #include "rxgnucomp.h" |
---|
24 | #include "rxbasic.h" |
---|
25 | #include "rxsimp.h" |
---|
26 | |
---|
27 | /* regcomp takes a regular expression as a string and compiles it. |
---|
28 | * |
---|
29 | * PATTERN is the address of the pattern string. |
---|
30 | * |
---|
31 | * CFLAGS is a series of bits which affect compilation. |
---|
32 | * |
---|
33 | * If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we |
---|
34 | * use POSIX basic syntax. |
---|
35 | * |
---|
36 | * If REG_NEWLINE is set, then . and [^...] don't match newline. |
---|
37 | * Also, regexec will try a match beginning after every newline. |
---|
38 | * |
---|
39 | * If REG_ICASE is set, then we considers upper- and lowercase |
---|
40 | * versions of letters to be equivalent when matching. |
---|
41 | * |
---|
42 | * If REG_NOSUB is set, then when PREG is passed to regexec, that |
---|
43 | * routine will report only success or failure, and nothing about the |
---|
44 | * registers. |
---|
45 | * |
---|
46 | * It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for |
---|
47 | * the return codes and their meanings.) |
---|
48 | */ |
---|
49 | |
---|
50 | |
---|
51 | #ifdef __STDC__ |
---|
52 | int |
---|
53 | regncomp (regex_t * preg, const char * pattern, int len, int cflags) |
---|
54 | #else |
---|
55 | int |
---|
56 | regncomp (preg, pattern, len, cflags) |
---|
57 | regex_t * preg; |
---|
58 | const char * pattern; |
---|
59 | int len; |
---|
60 | int cflags; |
---|
61 | #endif |
---|
62 | { |
---|
63 | int ret; |
---|
64 | unsigned int syntax; |
---|
65 | |
---|
66 | rx_bzero ((char *)preg, sizeof (*preg)); |
---|
67 | syntax = ((cflags & REG_EXTENDED) |
---|
68 | ? RE_SYNTAX_POSIX_EXTENDED |
---|
69 | : RE_SYNTAX_POSIX_BASIC); |
---|
70 | |
---|
71 | if (!(cflags & REG_ICASE)) |
---|
72 | preg->translate = 0; |
---|
73 | else |
---|
74 | { |
---|
75 | unsigned i; |
---|
76 | |
---|
77 | preg->translate = (unsigned char *) malloc (256); |
---|
78 | if (!preg->translate) |
---|
79 | return (int) REG_ESPACE; |
---|
80 | |
---|
81 | /* Map uppercase characters to corresponding lowercase ones. */ |
---|
82 | for (i = 0; i < CHAR_SET_SIZE; i++) |
---|
83 | preg->translate[i] = isupper (i) ? tolower (i) : i; |
---|
84 | } |
---|
85 | |
---|
86 | |
---|
87 | /* If REG_NEWLINE is set, newlines are treated differently. */ |
---|
88 | if (!(cflags & REG_NEWLINE)) |
---|
89 | preg->newline_anchor = 0; |
---|
90 | else |
---|
91 | { |
---|
92 | /* REG_NEWLINE implies neither . nor [^...] match newline. */ |
---|
93 | syntax &= ~RE_DOT_NEWLINE; |
---|
94 | syntax |= RE_HAT_LISTS_NOT_NEWLINE; |
---|
95 | /* It also changes the matching behavior. */ |
---|
96 | preg->newline_anchor = 1; |
---|
97 | } |
---|
98 | |
---|
99 | preg->no_sub = !!(cflags & REG_NOSUB); |
---|
100 | |
---|
101 | ret = rx_parse (&preg->pattern, |
---|
102 | pattern, len, |
---|
103 | syntax, |
---|
104 | 256, |
---|
105 | preg->translate); |
---|
106 | |
---|
107 | /* POSIX doesn't distinguish between an unmatched open-group and an |
---|
108 | * unmatched close-group: both are REG_EPAREN. |
---|
109 | */ |
---|
110 | if (ret == REG_ERPAREN) |
---|
111 | ret = REG_EPAREN; |
---|
112 | |
---|
113 | if (!ret) |
---|
114 | { |
---|
115 | preg->re_nsub = 1; |
---|
116 | preg->subexps = 0; |
---|
117 | rx_posix_analyze_rexp (&preg->subexps, |
---|
118 | &preg->re_nsub, |
---|
119 | preg->pattern, |
---|
120 | 0); |
---|
121 | preg->is_nullable = rx_fill_in_fastmap (256, |
---|
122 | preg->fastmap, |
---|
123 | preg->pattern); |
---|
124 | |
---|
125 | preg->is_anchored = rx_is_anchored_p (preg->pattern); |
---|
126 | } |
---|
127 | |
---|
128 | return (int) ret; |
---|
129 | } |
---|
130 | |
---|
131 | |
---|
132 | #ifdef __STDC__ |
---|
133 | int |
---|
134 | regcomp (regex_t * preg, const char * pattern, int cflags) |
---|
135 | #else |
---|
136 | int |
---|
137 | regcomp (preg, pattern, cflags) |
---|
138 | regex_t * preg; |
---|
139 | const char * pattern; |
---|
140 | int cflags; |
---|
141 | #endif |
---|
142 | { |
---|
143 | /* POSIX says a null character in the pattern terminates it, so we |
---|
144 | * can use strlen here in compiling the pattern. |
---|
145 | */ |
---|
146 | |
---|
147 | return regncomp (preg, pattern, strlen (pattern), cflags); |
---|
148 | } |
---|
149 | |
---|
150 | |
---|
151 | |
---|
152 | |
---|
153 | /* Returns a message corresponding to an error code, ERRCODE, returned |
---|
154 | from either regcomp or regexec. */ |
---|
155 | |
---|
156 | #ifdef __STDC__ |
---|
157 | size_t |
---|
158 | regerror (int errcode, const regex_t *preg, |
---|
159 | char *errbuf, size_t errbuf_size) |
---|
160 | #else |
---|
161 | size_t |
---|
162 | regerror (errcode, preg, errbuf, errbuf_size) |
---|
163 | int errcode; |
---|
164 | const regex_t *preg; |
---|
165 | char *errbuf; |
---|
166 | size_t errbuf_size; |
---|
167 | #endif |
---|
168 | { |
---|
169 | const char *msg; |
---|
170 | size_t msg_size; |
---|
171 | |
---|
172 | msg = rx_error_msg[errcode] == 0 ? "Success" : rx_error_msg[errcode]; |
---|
173 | msg_size = strlen (msg) + 1; /* Includes the 0. */ |
---|
174 | if (errbuf_size != 0) |
---|
175 | { |
---|
176 | if (msg_size > errbuf_size) |
---|
177 | { |
---|
178 | strncpy (errbuf, msg, errbuf_size - 1); |
---|
179 | errbuf[errbuf_size - 1] = 0; |
---|
180 | } |
---|
181 | else |
---|
182 | strcpy (errbuf, msg); |
---|
183 | } |
---|
184 | return msg_size; |
---|
185 | } |
---|
186 | |
---|
187 | |
---|
188 | |
---|
189 | #ifdef __STDC__ |
---|
190 | int |
---|
191 | rx_regmatch (regmatch_t pmatch[], const regex_t *preg, struct rx_context_rules * rules, int start, int end, const char *string) |
---|
192 | #else |
---|
193 | int |
---|
194 | rx_regmatch (pmatch, preg, rules, start, end, string) |
---|
195 | regmatch_t pmatch[]; |
---|
196 | const regex_t *preg; |
---|
197 | struct rx_context_rules * rules; |
---|
198 | int start; |
---|
199 | int end; |
---|
200 | const char *string; |
---|
201 | #endif |
---|
202 | { |
---|
203 | struct rx_solutions * solutions; |
---|
204 | enum rx_answers answer; |
---|
205 | struct rx_context_rules local_rules; |
---|
206 | int orig_end; |
---|
207 | int end_lower_bound; |
---|
208 | int end_upper_bound; |
---|
209 | |
---|
210 | local_rules = *rules; |
---|
211 | orig_end = end; |
---|
212 | |
---|
213 | if (!preg->pattern) |
---|
214 | { |
---|
215 | end_lower_bound = start; |
---|
216 | end_upper_bound = start; |
---|
217 | } |
---|
218 | else if (preg->pattern->len >= 0) |
---|
219 | { |
---|
220 | end_lower_bound = start + preg->pattern->len; |
---|
221 | end_upper_bound = start + preg->pattern->len; |
---|
222 | } |
---|
223 | else |
---|
224 | { |
---|
225 | end_lower_bound = start; |
---|
226 | end_upper_bound = end; |
---|
227 | } |
---|
228 | end = end_upper_bound; |
---|
229 | while (end >= end_lower_bound) |
---|
230 | { |
---|
231 | local_rules.not_eol = (rules->not_eol |
---|
232 | ? ( (end == orig_end) |
---|
233 | || !local_rules.newline_anchor |
---|
234 | || (string[end] != '\n')) |
---|
235 | : ( (end != orig_end) |
---|
236 | && (!local_rules.newline_anchor |
---|
237 | || (string[end] != '\n')))); |
---|
238 | solutions = rx_basic_make_solutions (pmatch, preg->pattern, preg->subexps, |
---|
239 | start, end, &local_rules, string); |
---|
240 | if (!solutions) |
---|
241 | return REG_ESPACE; |
---|
242 | |
---|
243 | answer = rx_next_solution (solutions); |
---|
244 | |
---|
245 | if (answer == rx_yes) |
---|
246 | { |
---|
247 | if (pmatch) |
---|
248 | { |
---|
249 | pmatch[0].rm_so = start; |
---|
250 | pmatch[0].rm_eo = end; |
---|
251 | pmatch[0].final_tag = solutions->final_tag; |
---|
252 | } |
---|
253 | rx_basic_free_solutions (solutions); |
---|
254 | return 0; |
---|
255 | } |
---|
256 | else |
---|
257 | rx_basic_free_solutions (solutions); |
---|
258 | |
---|
259 | --end; |
---|
260 | } |
---|
261 | |
---|
262 | switch (answer) |
---|
263 | { |
---|
264 | default: |
---|
265 | case rx_bogus: |
---|
266 | return REG_ESPACE; |
---|
267 | |
---|
268 | case rx_no: |
---|
269 | return REG_NOMATCH; |
---|
270 | } |
---|
271 | } |
---|
272 | |
---|
273 | |
---|
274 | #ifdef __STDC__ |
---|
275 | int |
---|
276 | rx_regexec (regmatch_t pmatch[], const regex_t *preg, struct rx_context_rules * rules, int start, int end, const char *string) |
---|
277 | #else |
---|
278 | int |
---|
279 | rx_regexec (pmatch, preg, rules, start, end, string) |
---|
280 | regmatch_t pmatch[]; |
---|
281 | const regex_t *preg; |
---|
282 | struct rx_context_rules * rules; |
---|
283 | int start; |
---|
284 | int end; |
---|
285 | const char *string; |
---|
286 | #endif |
---|
287 | { |
---|
288 | int x; |
---|
289 | int stat; |
---|
290 | int anchored; |
---|
291 | struct rexp_node * simplified; |
---|
292 | struct rx_unfa * unfa; |
---|
293 | struct rx_classical_system machine; |
---|
294 | |
---|
295 | anchored = preg->is_anchored; |
---|
296 | |
---|
297 | unfa = 0; |
---|
298 | if ((end - start) > RX_MANY_CASES) |
---|
299 | { |
---|
300 | if (0 > rx_simple_rexp (&simplified, 256, preg->pattern, preg->subexps)) |
---|
301 | return REG_ESPACE; |
---|
302 | unfa = rx_unfa (rx_basic_unfaniverse (), simplified, 256); |
---|
303 | if (!unfa) |
---|
304 | { |
---|
305 | rx_free_rexp (simplified); |
---|
306 | return REG_ESPACE; |
---|
307 | } |
---|
308 | rx_init_system (&machine, unfa->nfa); |
---|
309 | rx_free_rexp (simplified); |
---|
310 | } |
---|
311 | |
---|
312 | for (x = start; x <= end; ++x) |
---|
313 | { |
---|
314 | if (preg->is_nullable |
---|
315 | || ((x < end) |
---|
316 | && (preg->fastmap[((unsigned char *)string)[x]]))) |
---|
317 | { |
---|
318 | if ((end - start) > RX_MANY_CASES) |
---|
319 | { |
---|
320 | int amt; |
---|
321 | if (rx_start_superstate (&machine) != rx_yes) |
---|
322 | { |
---|
323 | rx_free_unfa (unfa); |
---|
324 | return REG_ESPACE; |
---|
325 | } |
---|
326 | amt = rx_advance_to_final (&machine, string + x, end - start - x); |
---|
327 | if (!machine.final_tag && (amt < (end - start - x))) |
---|
328 | goto nomatch; |
---|
329 | } |
---|
330 | stat = rx_regmatch (pmatch, preg, rules, x, end, string); |
---|
331 | if (!stat || (stat != REG_NOMATCH)) |
---|
332 | { |
---|
333 | rx_free_unfa (unfa); |
---|
334 | return stat; |
---|
335 | } |
---|
336 | } |
---|
337 | nomatch: |
---|
338 | if (anchored) |
---|
339 | if (!preg->newline_anchor) |
---|
340 | { |
---|
341 | rx_free_unfa (unfa); |
---|
342 | return REG_NOMATCH; |
---|
343 | } |
---|
344 | else |
---|
345 | while (x < end) |
---|
346 | if (string[x] == '\n') |
---|
347 | break; |
---|
348 | else |
---|
349 | ++x; |
---|
350 | } |
---|
351 | rx_free_unfa (unfa); |
---|
352 | return REG_NOMATCH; |
---|
353 | } |
---|
354 | |
---|
355 | |
---|
356 | |
---|
357 | /* regexec searches for a given pattern, specified by PREG, in the |
---|
358 | * string STRING. |
---|
359 | * |
---|
360 | * If NMATCH is zero or REG_NOSUB was set in the cflags argument to |
---|
361 | * `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at |
---|
362 | * least NMATCH elements, and we set them to the offsets of the |
---|
363 | * corresponding matched substrings. |
---|
364 | * |
---|
365 | * EFLAGS specifies `execution flags' which affect matching: if |
---|
366 | * REG_NOTBOL is set, then ^ does not match at the beginning of the |
---|
367 | * string; if REG_NOTEOL is set, then $ does not match at the end. |
---|
368 | * |
---|
369 | * We return 0 if we find a match and REG_NOMATCH if not. |
---|
370 | */ |
---|
371 | |
---|
372 | #ifdef __STDC__ |
---|
373 | int |
---|
374 | regnexec (const regex_t *preg, const char *string, int len, size_t nmatch, regmatch_t **pmatch, int eflags) |
---|
375 | #else |
---|
376 | int |
---|
377 | regnexec (preg, string, len, nmatch, pmatch, eflags) |
---|
378 | const regex_t *preg; |
---|
379 | const char *string; |
---|
380 | int len; |
---|
381 | size_t nmatch; |
---|
382 | regmatch_t **pmatch; |
---|
383 | int eflags; |
---|
384 | #endif |
---|
385 | { |
---|
386 | int want_reg_info; |
---|
387 | struct rx_context_rules rules; |
---|
388 | regmatch_t * regs; |
---|
389 | size_t nregs; |
---|
390 | int stat; |
---|
391 | |
---|
392 | want_reg_info = (!preg->no_sub && (nmatch > 0)); |
---|
393 | |
---|
394 | rules.newline_anchor = preg->newline_anchor; |
---|
395 | rules.not_bol = !!(eflags & REG_NOTBOL); |
---|
396 | rules.not_eol = !!(eflags & REG_NOTEOL); |
---|
397 | rules.case_indep = !!(eflags & REG_ICASE); |
---|
398 | |
---|
399 | if (nmatch >= preg->re_nsub) |
---|
400 | { |
---|
401 | regs = *pmatch; |
---|
402 | nregs = nmatch; |
---|
403 | } |
---|
404 | else |
---|
405 | { |
---|
406 | regs = (regmatch_t *)malloc (preg->re_nsub * sizeof (*regs)); |
---|
407 | if (!regs) |
---|
408 | return REG_ESPACE; |
---|
409 | nregs = preg->re_nsub; |
---|
410 | } |
---|
411 | |
---|
412 | { |
---|
413 | int x; |
---|
414 | for (x = 0; x < nregs; ++x) |
---|
415 | regs[x].rm_so = regs[x].rm_eo = -1; |
---|
416 | } |
---|
417 | |
---|
418 | |
---|
419 | stat = rx_regexec (regs, preg, &rules, 0, len, string); |
---|
420 | |
---|
421 | if (!stat && want_reg_info && pmatch && (regs != *pmatch)) |
---|
422 | { |
---|
423 | size_t x; |
---|
424 | for (x = 0; x < nmatch; ++x) |
---|
425 | (*pmatch)[x] = regs[x]; |
---|
426 | } |
---|
427 | |
---|
428 | if (!stat && (eflags & REG_ALLOC_REGS)) |
---|
429 | *pmatch = regs; |
---|
430 | else if (regs && (!pmatch || (regs != *pmatch))) |
---|
431 | free (regs); |
---|
432 | |
---|
433 | return stat; |
---|
434 | } |
---|
435 | |
---|
436 | #ifdef __STDC__ |
---|
437 | int |
---|
438 | regexec (const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags) |
---|
439 | #else |
---|
440 | int |
---|
441 | regexec (preg, string, nmatch, pmatch, eflags) |
---|
442 | const regex_t *preg; |
---|
443 | const char *string; |
---|
444 | size_t nmatch; |
---|
445 | regmatch_t pmatch[]; |
---|
446 | int eflags; |
---|
447 | #endif |
---|
448 | { |
---|
449 | return regnexec (preg, |
---|
450 | string, |
---|
451 | strlen (string), |
---|
452 | nmatch, |
---|
453 | &pmatch, |
---|
454 | (eflags & ~REG_ALLOC_REGS)); |
---|
455 | } |
---|
456 | |
---|
457 | |
---|
458 | /* Free dynamically allocated space used by PREG. */ |
---|
459 | |
---|
460 | #ifdef __STDC__ |
---|
461 | void |
---|
462 | regfree (regex_t *preg) |
---|
463 | #else |
---|
464 | void |
---|
465 | regfree (preg) |
---|
466 | regex_t *preg; |
---|
467 | #endif |
---|
468 | { |
---|
469 | if (preg->pattern) |
---|
470 | { |
---|
471 | rx_free_rexp (preg->pattern); |
---|
472 | preg->pattern = 0; |
---|
473 | } |
---|
474 | if (preg->subexps) |
---|
475 | { |
---|
476 | free (preg->subexps); |
---|
477 | preg->subexps = 0; |
---|
478 | } |
---|
479 | if (preg->translate != 0) |
---|
480 | { |
---|
481 | free (preg->translate); |
---|
482 | preg->translate = 0; |
---|
483 | } |
---|
484 | } |
---|