source: trunk/third/rx/rx/rxposix.c @ 10430

Revision 10430, 11.0 KB checked in by ghudson, 27 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r10429, which included commits to RCS files with non-trunk default branches.
Line 
1/*      Copyright (C) 1995, 1996 Tom Lord
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU Library General Public License as published by
5 * the Free Software Foundation; either version 2, or (at your option)
6 * any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 * GNU Library General Public License for more details.
12 *
13 * You should have received a copy of the GNU Library General Public License
14 * along with this software; see the file COPYING.  If not, write to
15 * the Free Software Foundation, 59 Temple Place - Suite 330,
16 * Boston, MA 02111-1307, USA.
17 */
18
19
20
21#include "rxall.h"
22#include "rxposix.h"
23#include "rxgnucomp.h"
24#include "rxbasic.h"
25#include "rxsimp.h"
26
27/* regcomp takes a regular expression as a string and compiles it.
28 *
29 * PATTERN is the address of the pattern string.
30 *
31 * CFLAGS is a series of bits which affect compilation.
32 *
33 *   If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
34 *   use POSIX basic syntax.
35 *
36 *   If REG_NEWLINE is set, then . and [^...] don't match newline.
37 *   Also, regexec will try a match beginning after every newline.
38 *
39 *   If REG_ICASE is set, then we considers upper- and lowercase
40 *   versions of letters to be equivalent when matching.
41 *
42 *   If REG_NOSUB is set, then when PREG is passed to regexec, that
43 *   routine will report only success or failure, and nothing about the
44 *   registers.
45 *
46 * It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
47 * the return codes and their meanings.) 
48 */
49
50
51#ifdef __STDC__
52int
53regncomp (regex_t * preg, const char * pattern, int len, int cflags)
54#else
55int
56regncomp (preg, pattern, len, cflags)
57     regex_t * preg;
58     const char * pattern;
59     int len;
60     int cflags;
61#endif
62{
63  int ret;
64  unsigned int syntax;
65
66  rx_bzero ((char *)preg, sizeof (*preg));
67  syntax = ((cflags & REG_EXTENDED)
68            ? RE_SYNTAX_POSIX_EXTENDED
69            : RE_SYNTAX_POSIX_BASIC);
70
71  if (!(cflags & REG_ICASE))
72    preg->translate = 0;
73  else
74    {
75      unsigned i;
76
77      preg->translate = (unsigned char *) malloc (256);
78      if (!preg->translate)
79        return (int) REG_ESPACE;
80
81      /* Map uppercase characters to corresponding lowercase ones.  */
82      for (i = 0; i < CHAR_SET_SIZE; i++)
83        preg->translate[i] = isupper (i) ? tolower (i) : i;
84    }
85
86
87  /* If REG_NEWLINE is set, newlines are treated differently.  */
88  if (!(cflags & REG_NEWLINE))
89    preg->newline_anchor = 0;
90  else
91    {
92      /* REG_NEWLINE implies neither . nor [^...] match newline.  */
93      syntax &= ~RE_DOT_NEWLINE;
94      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
95      /* It also changes the matching behavior.  */
96      preg->newline_anchor = 1;
97    }
98
99  preg->no_sub = !!(cflags & REG_NOSUB);
100
101  ret = rx_parse (&preg->pattern,
102                  pattern, len,
103                  syntax,
104                  256,
105                  preg->translate);
106
107  /* POSIX doesn't distinguish between an unmatched open-group and an
108   * unmatched close-group: both are REG_EPAREN.
109   */
110  if (ret == REG_ERPAREN)
111    ret = REG_EPAREN;
112
113  if (!ret)
114    {
115      preg->re_nsub = 1;
116      preg->subexps = 0;
117      rx_posix_analyze_rexp (&preg->subexps,
118                             &preg->re_nsub,
119                             preg->pattern,
120                             0);
121      preg->is_nullable = rx_fill_in_fastmap (256,
122                                              preg->fastmap,
123                                              preg->pattern);
124
125      preg->is_anchored = rx_is_anchored_p (preg->pattern);
126    }
127
128  return (int) ret;
129}
130
131
132#ifdef __STDC__
133int
134regcomp (regex_t * preg, const char * pattern, int cflags)
135#else
136int
137regcomp (preg, pattern, cflags)
138     regex_t * preg;
139     const char * pattern;
140     int cflags;
141#endif
142{
143  /* POSIX says a null character in the pattern terminates it, so we
144   * can use strlen here in compiling the pattern. 
145   */
146
147  return regncomp (preg, pattern, strlen (pattern), cflags);
148}
149
150
151
152
153/* Returns a message corresponding to an error code, ERRCODE, returned
154   from either regcomp or regexec.   */
155
156#ifdef __STDC__
157size_t
158regerror (int errcode, const regex_t *preg,
159          char *errbuf, size_t errbuf_size)
160#else
161size_t
162regerror (errcode, preg, errbuf, errbuf_size)
163    int errcode;
164    const regex_t *preg;
165    char *errbuf;
166    size_t errbuf_size;
167#endif
168{
169  const char *msg;
170  size_t msg_size;
171
172  msg = rx_error_msg[errcode] == 0 ? "Success" : rx_error_msg[errcode];
173  msg_size = strlen (msg) + 1; /* Includes the 0.  */
174  if (errbuf_size != 0)
175    {
176      if (msg_size > errbuf_size)
177        {
178          strncpy (errbuf, msg, errbuf_size - 1);
179          errbuf[errbuf_size - 1] = 0;
180        }
181      else
182        strcpy (errbuf, msg);
183    }
184  return msg_size;
185}
186
187
188
189#ifdef __STDC__
190int
191rx_regmatch (regmatch_t pmatch[], const regex_t *preg, struct rx_context_rules * rules, int start, int end, const char *string)
192#else
193int
194rx_regmatch (pmatch, preg, rules, start, end, string)
195     regmatch_t pmatch[];
196     const regex_t *preg;
197     struct rx_context_rules * rules;
198     int start;
199     int end;
200     const char *string;
201#endif
202{
203  struct rx_solutions * solutions;
204  enum rx_answers answer;
205  struct rx_context_rules local_rules;
206  int orig_end;
207  int end_lower_bound;
208  int end_upper_bound;
209 
210  local_rules = *rules;
211  orig_end = end;
212
213  if (!preg->pattern)
214    {
215      end_lower_bound = start;
216      end_upper_bound = start;
217    }
218  else if (preg->pattern->len >= 0)
219    {
220      end_lower_bound = start + preg->pattern->len;
221      end_upper_bound = start + preg->pattern->len;
222    }
223  else
224    {
225      end_lower_bound = start;
226      end_upper_bound = end;
227    }
228  end = end_upper_bound;
229  while (end >= end_lower_bound)
230    {
231      local_rules.not_eol = (rules->not_eol
232                             ? (   (end == orig_end)
233                                || !local_rules.newline_anchor
234                                || (string[end] != '\n'))
235                             : (   (end != orig_end)
236                                && (!local_rules.newline_anchor
237                                    || (string[end] != '\n'))));
238      solutions = rx_basic_make_solutions (pmatch, preg->pattern, preg->subexps,
239                                           start, end, &local_rules, string);
240      if (!solutions)
241        return REG_ESPACE;
242     
243      answer = rx_next_solution (solutions);
244
245      if (answer == rx_yes)
246        {
247          if (pmatch)
248            {
249              pmatch[0].rm_so = start;
250              pmatch[0].rm_eo = end;
251              pmatch[0].final_tag = solutions->final_tag;
252            }
253          rx_basic_free_solutions (solutions);
254          return 0;
255        }
256      else
257        rx_basic_free_solutions (solutions);
258
259      --end;
260    }
261
262  switch (answer)
263    {
264    default:
265    case rx_bogus:
266      return REG_ESPACE;
267
268    case rx_no:
269      return REG_NOMATCH;
270    }
271}
272
273
274#ifdef __STDC__
275int
276rx_regexec (regmatch_t pmatch[], const regex_t *preg, struct rx_context_rules * rules, int start, int end, const char *string)
277#else
278int
279rx_regexec (pmatch, preg, rules, start, end, string)
280     regmatch_t pmatch[];
281     const regex_t *preg;
282     struct rx_context_rules * rules;
283     int start;
284     int end;
285     const char *string;
286#endif
287{
288  int x;
289  int stat;
290  int anchored;
291  struct rexp_node * simplified;
292  struct rx_unfa * unfa;
293  struct rx_classical_system machine;
294
295  anchored = preg->is_anchored;
296
297  unfa = 0;
298  if ((end - start) > RX_MANY_CASES)
299    {
300      if (0 > rx_simple_rexp (&simplified, 256, preg->pattern, preg->subexps))
301        return REG_ESPACE;
302      unfa = rx_unfa (rx_basic_unfaniverse (), simplified, 256);
303      if (!unfa)
304        {
305          rx_free_rexp (simplified);
306          return REG_ESPACE;
307        }
308      rx_init_system (&machine, unfa->nfa);
309      rx_free_rexp (simplified);
310    }
311
312  for (x = start; x <= end; ++x)
313    {
314      if (preg->is_nullable
315          || ((x < end)
316              && (preg->fastmap[((unsigned char *)string)[x]])))
317        {
318          if ((end - start) > RX_MANY_CASES)
319            {
320              int amt;
321              if (rx_start_superstate (&machine) != rx_yes)
322                {
323                  rx_free_unfa (unfa);
324                  return REG_ESPACE;
325                }
326              amt = rx_advance_to_final (&machine, string + x, end - start - x);
327              if (!machine.final_tag && (amt < (end - start - x)))
328                goto nomatch;
329            }
330          stat = rx_regmatch (pmatch, preg, rules, x, end, string);
331          if (!stat || (stat != REG_NOMATCH))
332            {
333              rx_free_unfa (unfa);
334              return stat;
335            }
336        }
337    nomatch:
338      if (anchored)
339        if (!preg->newline_anchor)
340          {
341            rx_free_unfa (unfa);
342            return REG_NOMATCH;
343          }
344        else
345          while (x < end)
346            if (string[x] == '\n')
347              break;
348            else
349              ++x;
350    }
351  rx_free_unfa (unfa);
352  return REG_NOMATCH;
353}
354
355
356
357/* regexec searches for a given pattern, specified by PREG, in the
358 * string STRING.
359 *
360 * If NMATCH is zero or REG_NOSUB was set in the cflags argument to
361 * `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
362 * least NMATCH elements, and we set them to the offsets of the
363 * corresponding matched substrings.
364 *
365 * EFLAGS specifies `execution flags' which affect matching: if
366 * REG_NOTBOL is set, then ^ does not match at the beginning of the
367 * string; if REG_NOTEOL is set, then $ does not match at the end.
368 *
369 * We return 0 if we find a match and REG_NOMATCH if not. 
370 */
371
372#ifdef __STDC__
373int
374regnexec (const regex_t *preg, const char *string, int len, size_t nmatch, regmatch_t **pmatch, int eflags)
375#else
376int
377regnexec (preg, string, len, nmatch, pmatch, eflags)
378     const regex_t *preg;
379     const char *string;
380     int len;
381     size_t nmatch;
382     regmatch_t **pmatch;
383     int eflags;
384#endif
385{
386  int want_reg_info;
387  struct rx_context_rules rules;
388  regmatch_t * regs;
389  size_t nregs;
390  int stat;
391
392  want_reg_info = (!preg->no_sub && (nmatch > 0));
393
394  rules.newline_anchor = preg->newline_anchor;
395  rules.not_bol = !!(eflags & REG_NOTBOL);
396  rules.not_eol = !!(eflags & REG_NOTEOL);
397  rules.case_indep = !!(eflags & REG_ICASE);
398
399  if (nmatch >= preg->re_nsub)
400    {
401      regs = *pmatch;
402      nregs = nmatch;
403    }
404  else
405    {
406      regs = (regmatch_t *)malloc (preg->re_nsub * sizeof (*regs));
407      if (!regs)
408        return REG_ESPACE;
409      nregs = preg->re_nsub;
410    }
411
412  {
413    int x;
414    for (x = 0; x < nregs; ++x)
415      regs[x].rm_so = regs[x].rm_eo = -1;
416  }
417
418
419  stat = rx_regexec (regs, preg, &rules, 0, len, string);
420
421  if (!stat && want_reg_info && pmatch && (regs != *pmatch))
422    {
423      size_t x;
424      for (x = 0; x < nmatch; ++x)
425        (*pmatch)[x] = regs[x];
426    }
427
428  if (!stat && (eflags & REG_ALLOC_REGS))
429    *pmatch = regs;
430  else if (regs && (!pmatch || (regs != *pmatch)))
431    free (regs);
432 
433  return stat;
434}
435
436#ifdef __STDC__
437int
438regexec (const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags)
439#else
440int
441regexec (preg, string, nmatch, pmatch, eflags)
442     const regex_t *preg;
443     const char *string;
444     size_t nmatch;
445     regmatch_t pmatch[];
446     int eflags;
447#endif
448{
449  return regnexec (preg,
450                   string,
451                   strlen (string),
452                   nmatch,
453                   &pmatch,
454                   (eflags & ~REG_ALLOC_REGS));
455}
456
457
458/* Free dynamically allocated space used by PREG.  */
459
460#ifdef __STDC__
461void
462regfree (regex_t *preg)
463#else
464void
465regfree (preg)
466    regex_t *preg;
467#endif
468{
469  if (preg->pattern)
470    {
471      rx_free_rexp (preg->pattern);
472      preg->pattern = 0;
473    }
474  if (preg->subexps)
475    {
476      free (preg->subexps);
477      preg->subexps = 0;
478    }
479  if (preg->translate != 0)
480    {
481      free (preg->translate);
482      preg->translate = 0;
483    }
484}
Note: See TracBrowser for help on using the repository browser.