1 | /************************************************* |
---|
2 | * Perl-Compatible Regular Expressions * |
---|
3 | *************************************************/ |
---|
4 | |
---|
5 | /* |
---|
6 | This is a library of functions to support regular expressions whose syntax |
---|
7 | and semantics are as close as possible to those of the Perl 5 language. See |
---|
8 | the file Tech.Notes for some information on the internals. |
---|
9 | |
---|
10 | This module is a wrapper that provides a POSIX API to the underlying PCRE |
---|
11 | functions. |
---|
12 | |
---|
13 | Written by: Philip Hazel <ph10@cam.ac.uk> |
---|
14 | |
---|
15 | Copyright (c) 1997-2001 University of Cambridge |
---|
16 | |
---|
17 | ----------------------------------------------------------------------------- |
---|
18 | Permission is granted to anyone to use this software for any purpose on any |
---|
19 | computer system, and to redistribute it freely, subject to the following |
---|
20 | restrictions: |
---|
21 | |
---|
22 | 1. This software is distributed in the hope that it will be useful, |
---|
23 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
24 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
---|
25 | |
---|
26 | 2. The origin of this software must not be misrepresented, either by |
---|
27 | explicit claim or by omission. |
---|
28 | |
---|
29 | 3. Altered versions must be plainly marked as such, and must not be |
---|
30 | misrepresented as being the original software. |
---|
31 | |
---|
32 | 4. If PCRE is embedded in any software that is released under the GNU |
---|
33 | General Purpose Licence (GPL), then the terms of that licence shall |
---|
34 | supersede any condition above with which it is incompatible. |
---|
35 | ----------------------------------------------------------------------------- |
---|
36 | */ |
---|
37 | |
---|
38 | #include "internal.h" |
---|
39 | #include "pcreposix.h" |
---|
40 | #include "stdlib.h" |
---|
41 | |
---|
42 | |
---|
43 | |
---|
44 | /* Corresponding tables of PCRE error messages and POSIX error codes. */ |
---|
45 | |
---|
46 | static const char *estring[] = { |
---|
47 | ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10, |
---|
48 | ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20, |
---|
49 | ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR29, ERR29, ERR30, |
---|
50 | ERR31 }; |
---|
51 | |
---|
52 | static int eint[] = { |
---|
53 | REG_EESCAPE, /* "\\ at end of pattern" */ |
---|
54 | REG_EESCAPE, /* "\\c at end of pattern" */ |
---|
55 | REG_EESCAPE, /* "unrecognized character follows \\" */ |
---|
56 | REG_BADBR, /* "numbers out of order in {} quantifier" */ |
---|
57 | REG_BADBR, /* "number too big in {} quantifier" */ |
---|
58 | REG_EBRACK, /* "missing terminating ] for character class" */ |
---|
59 | REG_ECTYPE, /* "invalid escape sequence in character class" */ |
---|
60 | REG_ERANGE, /* "range out of order in character class" */ |
---|
61 | REG_BADRPT, /* "nothing to repeat" */ |
---|
62 | REG_BADRPT, /* "operand of unlimited repeat could match the empty string" */ |
---|
63 | REG_ASSERT, /* "internal error: unexpected repeat" */ |
---|
64 | REG_BADPAT, /* "unrecognized character after (?" */ |
---|
65 | REG_ASSERT, /* "unused error" */ |
---|
66 | REG_EPAREN, /* "missing )" */ |
---|
67 | REG_ESUBREG, /* "back reference to non-existent subpattern" */ |
---|
68 | REG_INVARG, /* "erroffset passed as NULL" */ |
---|
69 | REG_INVARG, /* "unknown option bit(s) set" */ |
---|
70 | REG_EPAREN, /* "missing ) after comment" */ |
---|
71 | REG_ESIZE, /* "parentheses nested too deeply" */ |
---|
72 | REG_ESIZE, /* "regular expression too large" */ |
---|
73 | REG_ESPACE, /* "failed to get memory" */ |
---|
74 | REG_EPAREN, /* "unmatched brackets" */ |
---|
75 | REG_ASSERT, /* "internal error: code overflow" */ |
---|
76 | REG_BADPAT, /* "unrecognized character after (?<" */ |
---|
77 | REG_BADPAT, /* "lookbehind assertion is not fixed length" */ |
---|
78 | REG_BADPAT, /* "malformed number after (?(" */ |
---|
79 | REG_BADPAT, /* "conditional group containe more than two branches" */ |
---|
80 | REG_BADPAT, /* "assertion expected after (?(" */ |
---|
81 | REG_BADPAT, /* "(?p must be followed by )" */ |
---|
82 | REG_ECTYPE, /* "unknown POSIX class name" */ |
---|
83 | REG_BADPAT, /* "POSIX collating elements are not supported" */ |
---|
84 | REG_INVARG, /* "this version of PCRE is not compiled with PCRE_UTF8 support" */ |
---|
85 | REG_BADPAT, /* "characters with values > 255 are not yet supported in classes" */ |
---|
86 | REG_BADPAT, /* "character value in \x{...} sequence is too large" */ |
---|
87 | REG_BADPAT /* "invalid condition (?(0)" */ |
---|
88 | }; |
---|
89 | |
---|
90 | /* Table of texts corresponding to POSIX error codes */ |
---|
91 | |
---|
92 | static const char *pstring[] = { |
---|
93 | "", /* Dummy for value 0 */ |
---|
94 | "internal error", /* REG_ASSERT */ |
---|
95 | "invalid repeat counts in {}", /* BADBR */ |
---|
96 | "pattern error", /* BADPAT */ |
---|
97 | "? * + invalid", /* BADRPT */ |
---|
98 | "unbalanced {}", /* EBRACE */ |
---|
99 | "unbalanced []", /* EBRACK */ |
---|
100 | "collation error - not relevant", /* ECOLLATE */ |
---|
101 | "bad class", /* ECTYPE */ |
---|
102 | "bad escape sequence", /* EESCAPE */ |
---|
103 | "empty expression", /* EMPTY */ |
---|
104 | "unbalanced ()", /* EPAREN */ |
---|
105 | "bad range inside []", /* ERANGE */ |
---|
106 | "expression too big", /* ESIZE */ |
---|
107 | "failed to get memory", /* ESPACE */ |
---|
108 | "bad back reference", /* ESUBREG */ |
---|
109 | "bad argument", /* INVARG */ |
---|
110 | "match failed" /* NOMATCH */ |
---|
111 | }; |
---|
112 | |
---|
113 | |
---|
114 | |
---|
115 | |
---|
116 | /************************************************* |
---|
117 | * Translate PCRE text code to int * |
---|
118 | *************************************************/ |
---|
119 | |
---|
120 | /* PCRE compile-time errors are given as strings defined as macros. We can just |
---|
121 | look them up in a table to turn them into POSIX-style error codes. */ |
---|
122 | |
---|
123 | static int |
---|
124 | pcre_posix_error_code(const char *s) |
---|
125 | { |
---|
126 | size_t i; |
---|
127 | for (i = 0; i < sizeof(estring)/sizeof(char *); i++) |
---|
128 | if (strcmp(s, estring[i]) == 0) return eint[i]; |
---|
129 | return REG_ASSERT; |
---|
130 | } |
---|
131 | |
---|
132 | |
---|
133 | |
---|
134 | /************************************************* |
---|
135 | * Translate error code to string * |
---|
136 | *************************************************/ |
---|
137 | |
---|
138 | size_t |
---|
139 | regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) |
---|
140 | { |
---|
141 | const char *message, *addmessage; |
---|
142 | size_t length, addlength; |
---|
143 | |
---|
144 | message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))? |
---|
145 | "unknown error code" : pstring[errcode]; |
---|
146 | length = strlen(message) + 1; |
---|
147 | |
---|
148 | addmessage = " at offset "; |
---|
149 | addlength = (preg != NULL && (int)preg->re_erroffset != -1)? |
---|
150 | strlen(addmessage) + 6 : 0; |
---|
151 | |
---|
152 | if (errbuf_size > 0) |
---|
153 | { |
---|
154 | if (addlength > 0 && errbuf_size >= length + addlength) |
---|
155 | sprintf(errbuf, "%s%s%-6d", message, addmessage, (int)preg->re_erroffset); |
---|
156 | else |
---|
157 | { |
---|
158 | strncpy(errbuf, message, errbuf_size - 1); |
---|
159 | errbuf[errbuf_size-1] = 0; |
---|
160 | } |
---|
161 | } |
---|
162 | |
---|
163 | return length + addlength; |
---|
164 | } |
---|
165 | |
---|
166 | |
---|
167 | |
---|
168 | |
---|
169 | /************************************************* |
---|
170 | * Free store held by a regex * |
---|
171 | *************************************************/ |
---|
172 | |
---|
173 | void |
---|
174 | regfree(regex_t *preg) |
---|
175 | { |
---|
176 | (pcre_free)(preg->re_pcre); |
---|
177 | } |
---|
178 | |
---|
179 | |
---|
180 | |
---|
181 | |
---|
182 | /************************************************* |
---|
183 | * Compile a regular expression * |
---|
184 | *************************************************/ |
---|
185 | |
---|
186 | /* |
---|
187 | Arguments: |
---|
188 | preg points to a structure for recording the compiled expression |
---|
189 | pattern the pattern to compile |
---|
190 | cflags compilation flags |
---|
191 | |
---|
192 | Returns: 0 on success |
---|
193 | various non-zero codes on failure |
---|
194 | */ |
---|
195 | |
---|
196 | int |
---|
197 | regcomp(regex_t *preg, const char *pattern, int cflags) |
---|
198 | { |
---|
199 | const char *errorptr; |
---|
200 | int erroffset; |
---|
201 | int options = 0; |
---|
202 | |
---|
203 | if ((cflags & REG_ICASE) != 0) options |= PCRE_CASELESS; |
---|
204 | if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE; |
---|
205 | |
---|
206 | preg->re_pcre = pcre_compile(pattern, options, &errorptr, &erroffset, NULL); |
---|
207 | preg->re_erroffset = erroffset; |
---|
208 | |
---|
209 | if (preg->re_pcre == NULL) return pcre_posix_error_code(errorptr); |
---|
210 | |
---|
211 | preg->re_nsub = pcre_info(preg->re_pcre, NULL, NULL); |
---|
212 | return 0; |
---|
213 | } |
---|
214 | |
---|
215 | |
---|
216 | |
---|
217 | |
---|
218 | /************************************************* |
---|
219 | * Match a regular expression * |
---|
220 | *************************************************/ |
---|
221 | |
---|
222 | /* Unfortunately, PCRE requires 3 ints of working space for each captured |
---|
223 | substring, so we have to get and release working store instead of just using |
---|
224 | the POSIX structures as was done in earlier releases when PCRE needed only 2 |
---|
225 | ints. */ |
---|
226 | |
---|
227 | int |
---|
228 | regexec(regex_t *preg, const char *string, size_t nmatch, |
---|
229 | regmatch_t pmatch[], int eflags) |
---|
230 | { |
---|
231 | int rc; |
---|
232 | int options = 0; |
---|
233 | int *ovector = NULL; |
---|
234 | |
---|
235 | if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL; |
---|
236 | if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL; |
---|
237 | |
---|
238 | preg->re_erroffset = (size_t)(-1); /* Only has meaning after compile */ |
---|
239 | |
---|
240 | if (nmatch > 0) |
---|
241 | { |
---|
242 | ovector = (int *)malloc(sizeof(int) * nmatch * 3); |
---|
243 | if (ovector == NULL) return REG_ESPACE; |
---|
244 | } |
---|
245 | |
---|
246 | rc = pcre_exec(preg->re_pcre, NULL, string, (int)strlen(string), 0, options, |
---|
247 | ovector, nmatch * 3); |
---|
248 | |
---|
249 | if (rc == 0) rc = nmatch; /* All captured slots were filled in */ |
---|
250 | |
---|
251 | if (rc >= 0) |
---|
252 | { |
---|
253 | size_t i; |
---|
254 | for (i = 0; i < rc; i++) |
---|
255 | { |
---|
256 | pmatch[i].rm_so = ovector[i*2]; |
---|
257 | pmatch[i].rm_eo = ovector[i*2+1]; |
---|
258 | } |
---|
259 | if (ovector != NULL) free(ovector); |
---|
260 | for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1; |
---|
261 | return 0; |
---|
262 | } |
---|
263 | |
---|
264 | else |
---|
265 | { |
---|
266 | if (ovector != NULL) free(ovector); |
---|
267 | switch(rc) |
---|
268 | { |
---|
269 | case PCRE_ERROR_NOMATCH: return REG_NOMATCH; |
---|
270 | case PCRE_ERROR_NULL: return REG_INVARG; |
---|
271 | case PCRE_ERROR_BADOPTION: return REG_INVARG; |
---|
272 | case PCRE_ERROR_BADMAGIC: return REG_INVARG; |
---|
273 | case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT; |
---|
274 | case PCRE_ERROR_NOMEMORY: return REG_ESPACE; |
---|
275 | default: return REG_ASSERT; |
---|
276 | } |
---|
277 | } |
---|
278 | } |
---|
279 | |
---|
280 | /* End of pcreposix.c */ |
---|