source: trunk/third/pcre/pcreposix.c @ 19309

Revision 19309, 8.9 KB checked in by ghudson, 22 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r19308, which included commits to RCS files with non-trunk default branches.
Line 
1/*************************************************
2*      Perl-Compatible Regular Expressions       *
3*************************************************/
4
5/*
6This is a library of functions to support regular expressions whose syntax
7and semantics are as close as possible to those of the Perl 5 language. See
8the file Tech.Notes for some information on the internals.
9
10This module is a wrapper that provides a POSIX API to the underlying PCRE
11functions.
12
13Written by: Philip Hazel <ph10@cam.ac.uk>
14
15           Copyright (c) 1997-2001 University of Cambridge
16
17-----------------------------------------------------------------------------
18Permission is granted to anyone to use this software for any purpose on any
19computer system, and to redistribute it freely, subject to the following
20restrictions:
21
221. This software is distributed in the hope that it will be useful,
23   but WITHOUT ANY WARRANTY; without even the implied warranty of
24   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
25
262. The origin of this software must not be misrepresented, either by
27   explicit claim or by omission.
28
293. Altered versions must be plainly marked as such, and must not be
30   misrepresented as being the original software.
31
324. If PCRE is embedded in any software that is released under the GNU
33   General Purpose Licence (GPL), then the terms of that licence shall
34   supersede any condition above with which it is incompatible.
35-----------------------------------------------------------------------------
36*/
37
38#include "internal.h"
39#include "pcreposix.h"
40#include "stdlib.h"
41
42
43
44/* Corresponding tables of PCRE error messages and POSIX error codes. */
45
46static const char *estring[] = {
47  ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,  ERR10,
48  ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20,
49  ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR29, ERR29, ERR30,
50  ERR31 };
51
52static int eint[] = {
53  REG_EESCAPE, /* "\\ at end of pattern" */
54  REG_EESCAPE, /* "\\c at end of pattern" */
55  REG_EESCAPE, /* "unrecognized character follows \\" */
56  REG_BADBR,   /* "numbers out of order in {} quantifier" */
57  REG_BADBR,   /* "number too big in {} quantifier" */
58  REG_EBRACK,  /* "missing terminating ] for character class" */
59  REG_ECTYPE,  /* "invalid escape sequence in character class" */
60  REG_ERANGE,  /* "range out of order in character class" */
61  REG_BADRPT,  /* "nothing to repeat" */
62  REG_BADRPT,  /* "operand of unlimited repeat could match the empty string" */
63  REG_ASSERT,  /* "internal error: unexpected repeat" */
64  REG_BADPAT,  /* "unrecognized character after (?" */
65  REG_ASSERT,  /* "unused error" */
66  REG_EPAREN,  /* "missing )" */
67  REG_ESUBREG, /* "back reference to non-existent subpattern" */
68  REG_INVARG,  /* "erroffset passed as NULL" */
69  REG_INVARG,  /* "unknown option bit(s) set" */
70  REG_EPAREN,  /* "missing ) after comment" */
71  REG_ESIZE,   /* "parentheses nested too deeply" */
72  REG_ESIZE,   /* "regular expression too large" */
73  REG_ESPACE,  /* "failed to get memory" */
74  REG_EPAREN,  /* "unmatched brackets" */
75  REG_ASSERT,  /* "internal error: code overflow" */
76  REG_BADPAT,  /* "unrecognized character after (?<" */
77  REG_BADPAT,  /* "lookbehind assertion is not fixed length" */
78  REG_BADPAT,  /* "malformed number after (?(" */
79  REG_BADPAT,  /* "conditional group containe more than two branches" */
80  REG_BADPAT,  /* "assertion expected after (?(" */
81  REG_BADPAT,  /* "(?p must be followed by )" */
82  REG_ECTYPE,  /* "unknown POSIX class name" */
83  REG_BADPAT,  /* "POSIX collating elements are not supported" */
84  REG_INVARG,  /* "this version of PCRE is not compiled with PCRE_UTF8 support" */
85  REG_BADPAT,  /* "characters with values > 255 are not yet supported in classes" */
86  REG_BADPAT,  /* "character value in \x{...} sequence is too large" */
87  REG_BADPAT   /* "invalid condition (?(0)" */
88};
89
90/* Table of texts corresponding to POSIX error codes */
91
92static const char *pstring[] = {
93  "",                                /* Dummy for value 0 */
94  "internal error",                  /* REG_ASSERT */
95  "invalid repeat counts in {}",     /* BADBR      */
96  "pattern error",                   /* BADPAT     */
97  "? * + invalid",                   /* BADRPT     */
98  "unbalanced {}",                   /* EBRACE     */
99  "unbalanced []",                   /* EBRACK     */
100  "collation error - not relevant",  /* ECOLLATE   */
101  "bad class",                       /* ECTYPE     */
102  "bad escape sequence",             /* EESCAPE    */
103  "empty expression",                /* EMPTY      */
104  "unbalanced ()",                   /* EPAREN     */
105  "bad range inside []",             /* ERANGE     */
106  "expression too big",              /* ESIZE      */
107  "failed to get memory",            /* ESPACE     */
108  "bad back reference",              /* ESUBREG    */
109  "bad argument",                    /* INVARG     */
110  "match failed"                     /* NOMATCH    */
111};
112
113
114
115
116/*************************************************
117*          Translate PCRE text code to int       *
118*************************************************/
119
120/* PCRE compile-time errors are given as strings defined as macros. We can just
121look them up in a table to turn them into POSIX-style error codes. */
122
123static int
124pcre_posix_error_code(const char *s)
125{
126size_t i;
127for (i = 0; i < sizeof(estring)/sizeof(char *); i++)
128  if (strcmp(s, estring[i]) == 0) return eint[i];
129return REG_ASSERT;
130}
131
132
133
134/*************************************************
135*          Translate error code to string        *
136*************************************************/
137
138size_t
139regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
140{
141const char *message, *addmessage;
142size_t length, addlength;
143
144message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
145  "unknown error code" : pstring[errcode];
146length = strlen(message) + 1;
147
148addmessage = " at offset ";
149addlength = (preg != NULL && (int)preg->re_erroffset != -1)?
150  strlen(addmessage) + 6 : 0;
151
152if (errbuf_size > 0)
153  {
154  if (addlength > 0 && errbuf_size >= length + addlength)
155    sprintf(errbuf, "%s%s%-6d", message, addmessage, (int)preg->re_erroffset);
156  else
157    {
158    strncpy(errbuf, message, errbuf_size - 1);
159    errbuf[errbuf_size-1] = 0;
160    }
161  }
162
163return length + addlength;
164}
165
166
167
168
169/*************************************************
170*           Free store held by a regex           *
171*************************************************/
172
173void
174regfree(regex_t *preg)
175{
176(pcre_free)(preg->re_pcre);
177}
178
179
180
181
182/*************************************************
183*            Compile a regular expression        *
184*************************************************/
185
186/*
187Arguments:
188  preg        points to a structure for recording the compiled expression
189  pattern     the pattern to compile
190  cflags      compilation flags
191
192Returns:      0 on success
193              various non-zero codes on failure
194*/
195
196int
197regcomp(regex_t *preg, const char *pattern, int cflags)
198{
199const char *errorptr;
200int erroffset;
201int options = 0;
202
203if ((cflags & REG_ICASE) != 0) options |= PCRE_CASELESS;
204if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE;
205
206preg->re_pcre = pcre_compile(pattern, options, &errorptr, &erroffset, NULL);
207preg->re_erroffset = erroffset;
208
209if (preg->re_pcre == NULL) return pcre_posix_error_code(errorptr);
210
211preg->re_nsub = pcre_info(preg->re_pcre, NULL, NULL);
212return 0;
213}
214
215
216
217
218/*************************************************
219*              Match a regular expression        *
220*************************************************/
221
222/* Unfortunately, PCRE requires 3 ints of working space for each captured
223substring, so we have to get and release working store instead of just using
224the POSIX structures as was done in earlier releases when PCRE needed only 2
225ints. */
226
227int
228regexec(regex_t *preg, const char *string, size_t nmatch,
229  regmatch_t pmatch[], int eflags)
230{
231int rc;
232int options = 0;
233int *ovector = NULL;
234
235if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL;
236if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL;
237
238preg->re_erroffset = (size_t)(-1);   /* Only has meaning after compile */
239
240if (nmatch > 0)
241  {
242  ovector = (int *)malloc(sizeof(int) * nmatch * 3);
243  if (ovector == NULL) return REG_ESPACE;
244  }
245
246rc = pcre_exec(preg->re_pcre, NULL, string, (int)strlen(string), 0, options,
247  ovector, nmatch * 3);
248
249if (rc == 0) rc = nmatch;    /* All captured slots were filled in */
250
251if (rc >= 0)
252  {
253  size_t i;
254  for (i = 0; i < rc; i++)
255    {
256    pmatch[i].rm_so = ovector[i*2];
257    pmatch[i].rm_eo = ovector[i*2+1];
258    }
259  if (ovector != NULL) free(ovector);
260  for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
261  return 0;
262  }
263
264else
265  {
266  if (ovector != NULL) free(ovector);
267  switch(rc)
268    {
269    case PCRE_ERROR_NOMATCH: return REG_NOMATCH;
270    case PCRE_ERROR_NULL: return REG_INVARG;
271    case PCRE_ERROR_BADOPTION: return REG_INVARG;
272    case PCRE_ERROR_BADMAGIC: return REG_INVARG;
273    case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT;
274    case PCRE_ERROR_NOMEMORY: return REG_ESPACE;
275    default: return REG_ASSERT;
276    }
277  }
278}
279
280/* End of pcreposix.c */
Note: See TracBrowser for help on using the repository browser.