source: trunk/third/sed/lib/regex.c @ 17271

Revision 17271, 183.1 KB checked in by ghudson, 23 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r17270, which included commits to RCS files with non-trunk default branches.
Line 
1/* Extended regular expression matching and search library,
2   version 0.12.
3   (Implements POSIX draft P1003.2/D11.2, except for some of the
4   internationalization features.)
5   Copyright (C) 1993, 94, 95, 96, 97, 98 Free Software Foundation, Inc.
6
7   NOTE: The canonical source of this file is maintained with the GNU C Library.
8   Bugs can be reported to bug-glibc@gnu.org.
9
10   This program is free software; you can redistribute it and/or modify it
11   under the terms of the GNU General Public License as published by the
12   Free Software Foundation; either version 2, or (at your option) any
13   later version.
14
15   This program is distributed in the hope that it will be useful,
16   but WITHOUT ANY WARRANTY; without even the implied warranty of
17   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18   GNU General Public License for more details.
19
20   You should have received a copy of the GNU General Public License
21   along with this program; if not, write to the Free Software
22   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
23   USA.  */
24
25/* AIX requires this to be the first thing in the file. */
26#if defined _AIX && !defined REGEX_MALLOC
27  #pragma alloca
28#endif
29
30#undef  _GNU_SOURCE
31#define _GNU_SOURCE
32
33#ifdef HAVE_CONFIG_H
34# include <config.h>
35#endif
36
37#ifndef PARAMS
38# if defined __GNUC__ || (defined __STDC__ && __STDC__)
39#  define PARAMS(args) args
40# else
41#  define PARAMS(args) ()
42# endif  /* GCC.  */
43#endif  /* Not PARAMS.  */
44
45#if defined STDC_HEADERS && !defined emacs
46# include <stddef.h>
47#else
48/* We need this for `regex-gnu.h', and perhaps for the Emacs include files.  */
49# include <sys/types.h>
50#endif
51
52/* For platform which support the ISO C amendement 1 functionality we
53   support user defined character classes.  */
54#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
55# include <wctype.h>
56# include <wchar.h>
57
58/* We have to keep the namespace clean.  */
59# define regfree(preg) __regfree (preg)
60# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
61# define regnexec(pr, st, ln, nm, pm, ef) __regnexec (pr, st, ln, nm, pm, ef)
62# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
63# define regerror(errcode, preg, errbuf, errbuf_size) \
64        __regerror(errcode, preg, errbuf, errbuf_size)
65# define re_set_registers(bu, re, nu, st, en) \
66        __re_set_registers (bu, re, nu, st, en)
67# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
68        __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
69# define re_match(bufp, string, size, pos, regs) \
70        __re_match (bufp, string, size, pos, regs)
71# define re_search(bufp, string, size, startpos, range, regs) \
72        __re_search (bufp, string, size, startpos, range, regs)
73# define re_compile_pattern(pattern, length, bufp) \
74        __re_compile_pattern (pattern, length, bufp)
75# define re_set_syntax(syntax) __re_set_syntax (syntax)
76# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
77        __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
78# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
79
80#define btowc __btowc
81#endif
82
83/* This is for other GNU distributions with internationalized messages.  */
84#if HAVE_LIBINTL_H || defined _LIBC
85# include <libintl.h>
86#else
87# define gettext(msgid) (msgid)
88#endif
89
90#ifndef gettext_noop
91/* This define is so xgettext can find the internationalizable
92   strings.  */
93# define gettext_noop(String) String
94#endif
95
96/* The `emacs' switch turns on certain matching commands
97   that make sense only in Emacs. */
98#ifdef emacs
99
100# include "lisp.h"
101# include "buffer.h"
102# include "syntax.h"
103
104#else  /* not emacs */
105
106/* If we are not linking with Emacs proper,
107   we can't use the relocating allocator
108   even if config.h says that we can.  */
109# undef REL_ALLOC
110
111# if defined STDC_HEADERS || defined _LIBC
112#  include <stdlib.h>
113# else
114char *malloc ();
115char *realloc ();
116# endif
117
118/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
119   If nothing else has been done, use the method below.  */
120# ifdef INHIBIT_STRING_HEADER
121#  if !(defined HAVE_BZERO && defined HAVE_BCOPY)
122#   if !defined bzero && !defined bcopy
123#    undef INHIBIT_STRING_HEADER
124#   endif
125#  endif
126# endif
127
128/* This is the normal way of making sure we have a bcopy and a bzero.
129   This is used in most programs--a few other programs avoid this
130   by defining INHIBIT_STRING_HEADER.  */
131# ifndef INHIBIT_STRING_HEADER
132#  if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
133#   include <string.h>
134#   ifndef bzero
135#    ifndef _LIBC
136#     define bzero(s, n)        (memset (s, '\0', n), (s))
137#    else
138#     define bzero(s, n)        __bzero (s, n)
139#    endif
140#   endif
141#  else
142#   include <strings.h>
143#   ifndef memcmp
144#    define memcmp(s1, s2, n)   bcmp (s1, s2, n)
145#   endif
146#   ifndef memcpy
147#    define memcpy(d, s, n)     (bcopy (s, d, n), (d))
148#   endif
149#  endif
150# endif
151
152/* Define the syntax stuff for \<, \>, etc.  */
153
154/* This must be nonzero for the wordchar and notwordchar pattern
155   commands in re_match_2.  */
156# ifndef Sword
157#  define Sword 1
158# endif
159
160# ifdef SWITCH_ENUM_BUG
161#  define SWITCH_ENUM_CAST(x) ((int)(x))
162# else
163#  define SWITCH_ENUM_CAST(x) (x)
164# endif
165
166/* How many characters in the character set.  */
167# define CHAR_SET_SIZE 256
168
169# ifdef SYNTAX_TABLE
170
171extern char *re_syntax_table;
172
173# else /* not SYNTAX_TABLE */
174
175static char re_syntax_table[CHAR_SET_SIZE];
176
177static void
178init_syntax_once ()
179{
180   register int c;
181   static int done = 0;
182
183   if (done)
184     return;
185
186   bzero (re_syntax_table, sizeof re_syntax_table);
187
188   for (c = 'a'; c <= 'z'; c++)
189     re_syntax_table[c] = Sword;
190
191   for (c = 'A'; c <= 'Z'; c++)
192     re_syntax_table[c] = Sword;
193
194   for (c = '0'; c <= '9'; c++)
195     re_syntax_table[c] = Sword;
196
197   re_syntax_table['_'] = Sword;
198
199   done = 1;
200}
201
202# endif /* not SYNTAX_TABLE */
203
204# define SYNTAX(c) re_syntax_table[c]
205
206#endif /* not emacs */
207
208/* Get the interface, including the syntax bits.  */
209#include "regex-gnu.h"
210
211/* isalpha etc. are used for the character classes.  */
212#include <ctype.h>
213
214/* Jim Meyering writes:
215
216   "... Some ctype macros are valid only for character codes that
217   isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
218   using /bin/cc or gcc but without giving an ansi option).  So, all
219   ctype uses should be through macros like ISPRINT...  If
220   STDC_HEADERS is defined, then autoconf has verified that the ctype
221   macros don't need to be guarded with references to isascii. ...
222   Defining isascii to 1 should let any compiler worth its salt
223   eliminate the && through constant folding."
224   Solaris defines some of these symbols so we must undefine them first.  */
225
226#undef ISASCII
227#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
228# define ISASCII(c) 1
229#else
230# define ISASCII(c) isascii(c)
231#endif
232
233#ifdef isblank
234# define ISBLANK(c) (ISASCII (c) && isblank (c))
235#else
236# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
237#endif
238#ifdef isgraph
239# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
240#else
241# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
242#endif
243
244#undef ISPRINT
245#define ISPRINT(c) (ISASCII (c) && isprint (c))
246#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
247#define ISALNUM(c) (ISASCII (c) && isalnum (c))
248#define ISALPHA(c) (ISASCII (c) && isalpha (c))
249#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
250#define ISLOWER(c) (ISASCII (c) && islower (c))
251#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
252#define ISSPACE(c) (ISASCII (c) && isspace (c))
253#define ISUPPER(c) (ISASCII (c) && isupper (c))
254#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
255
256#ifndef NULL
257# define NULL (void *)0
258#endif
259
260/* We remove any previous definition of `SIGN_EXTEND_CHAR',
261   since ours (we hope) works properly with all combinations of
262   machines, compilers, `char' and `unsigned char' argument types.
263   (Per Bothner suggested the basic approach.)  */
264#undef SIGN_EXTEND_CHAR
265#if __STDC__
266# define SIGN_EXTEND_CHAR(c) ((signed char) (c))
267#else  /* not __STDC__ */
268/* As in Harbison and Steele.  */
269# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
270#endif
271
272/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
273   use `alloca' instead of `malloc'.  This is because using malloc in
274   re_search* or re_match* could cause memory leaks when C-g is used in
275   Emacs; also, malloc is slower and causes storage fragmentation.  On
276   the other hand, malloc is more portable, and easier to debug.
277
278   Because we sometimes use alloca, some routines have to be macros,
279   not functions -- `alloca'-allocated space disappears at the end of the
280   function it is called in.  */
281
282#ifdef REGEX_MALLOC
283
284# define REGEX_ALLOCATE malloc
285# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
286# define REGEX_FREE free
287
288#else /* not REGEX_MALLOC  */
289
290/* Emacs already defines alloca, sometimes.  */
291# ifndef alloca
292
293/* Make alloca work the best possible way.  */
294#  ifdef __GNUC__
295#   define alloca __builtin_alloca
296#  else /* not __GNUC__ */
297#   if HAVE_ALLOCA_H
298#    include <alloca.h>
299#   endif /* HAVE_ALLOCA_H */
300#  endif /* not __GNUC__ */
301
302# endif /* not alloca */
303
304# define REGEX_ALLOCATE alloca
305
306/* Assumes a `char *destination' variable.  */
307# define REGEX_REALLOCATE(source, osize, nsize)                         \
308  (destination = (char *) alloca (nsize),                               \
309   memcpy (destination, source, osize))
310
311/* No need to do anything to free, after alloca.  */
312# define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
313
314#endif /* not REGEX_MALLOC */
315
316/* Define how to allocate the failure stack.  */
317
318#if defined REL_ALLOC && defined REGEX_MALLOC
319
320# define REGEX_ALLOCATE_STACK(size)                             \
321  r_alloc (&failure_stack_ptr, (size))
322# define REGEX_REALLOCATE_STACK(source, osize, nsize)           \
323  r_re_alloc (&failure_stack_ptr, (nsize))
324# define REGEX_FREE_STACK(ptr)                                  \
325  r_alloc_free (&failure_stack_ptr)
326
327#else /* not using relocating allocator */
328
329# ifdef REGEX_MALLOC
330
331#  define REGEX_ALLOCATE_STACK malloc
332#  define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
333#  define REGEX_FREE_STACK free
334
335# else /* not REGEX_MALLOC */
336
337#  define REGEX_ALLOCATE_STACK alloca
338
339#  define REGEX_REALLOCATE_STACK(source, osize, nsize)                  \
340   REGEX_REALLOCATE (source, osize, nsize)
341/* No need to explicitly free anything.  */
342#  define REGEX_FREE_STACK(arg)
343
344# endif /* not REGEX_MALLOC */
345#endif /* not using relocating allocator */
346
347
348/* True if `size1' is non-NULL and PTR is pointing anywhere inside
349   `string1' or just past its end.  This works if PTR is NULL, which is
350   a good thing.  */
351#define FIRST_STRING_P(ptr)                                     \
352  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
353
354/* (Re)Allocate N items of type T using malloc, or fail.  */
355#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
356#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
357#define RETALLOC_IF(addr, n, t) \
358  if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
359#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
360
361#define BYTEWIDTH 8 /* In bits.  */
362
363#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
364
365#undef MAX
366#undef MIN
367#define MAX(a, b) ((a) > (b) ? (a) : (b))
368#define MIN(a, b) ((a) < (b) ? (a) : (b))
369
370typedef char boolean;
371#define false 0
372#define true 1
373
374static int re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
375                                        const char *string1, int size1,
376                                        const char *string2, int size2,
377                                        int pos,
378                                        struct re_registers *regs,
379                                        int stop));
380
381/* These are the command codes that appear in compiled regular
382   expressions.  Some opcodes are followed by argument bytes.  A
383   command code can specify any interpretation whatsoever for its
384   arguments.  Zero bytes may appear in the compiled regular expression.  */
385
386typedef enum
387{
388  no_op = 0,
389
390  /* Succeed right away--no more backtracking.  */
391  succeed,
392
393        /* Followed by one byte giving n, then by n literal bytes.  */
394  exactn,
395
396        /* Matches any (more or less) character.  */
397  anychar,
398
399        /* Matches any one char belonging to specified set.  First
400           following byte is number of bitmap bytes.  Then come bytes
401           for a bitmap saying which chars are in.  Bits in each byte
402           are ordered low-bit-first.  A character is in the set if its
403           bit is 1.  A character too large to have a bit in the map is
404           automatically not in the set.  */
405  charset,
406
407        /* Same parameters as charset, but match any character that is
408           not one of those specified.  */
409  charset_not,
410
411        /* Start remembering the text that is matched, for storing in a
412           register.  Followed by one byte with the register number, in
413           the range 0 to one less than the pattern buffer's re_nsub
414           field.  Then followed by one byte with the number of groups
415           inner to this one.  (This last has to be part of the
416           start_memory only because we need it in the on_failure_jump
417           of re_match_2.)  */
418  start_memory,
419
420        /* Stop remembering the text that is matched and store it in a
421           memory register.  Followed by one byte with the register
422           number, in the range 0 to one less than `re_nsub' in the
423           pattern buffer, and one byte with the number of inner groups,
424           just like `start_memory'.  (We need the number of inner
425           groups here because we don't have any easy way of finding the
426           corresponding start_memory when we're at a stop_memory.)  */
427  stop_memory,
428
429        /* Match a duplicate of something remembered. Followed by one
430           byte containing the register number.  */
431  duplicate,
432
433        /* Fail unless at beginning of line.  */
434  begline,
435
436        /* Fail unless at end of line.  */
437  endline,
438
439        /* Succeeds if at beginning of buffer (if emacs) or at beginning
440           of string to be matched (if not).  */
441  begbuf,
442
443        /* Analogously, for end of buffer/string.  */
444  endbuf,
445
446        /* Followed by two byte relative address to which to jump.  */
447  jump,
448
449        /* Same as jump, but marks the end of an alternative.  */
450  jump_past_alt,
451
452        /* Followed by two-byte relative address of place to resume at
453           in case of failure.  */
454  on_failure_jump,
455
456        /* Like on_failure_jump, but pushes a placeholder instead of the
457           current string position when executed.  */
458  on_failure_keep_string_jump,
459
460        /* Throw away latest failure point and then jump to following
461           two-byte relative address.  */
462  pop_failure_jump,
463
464        /* Change to pop_failure_jump if know won't have to backtrack to
465           match; otherwise change to jump.  This is used to jump
466           back to the beginning of a repeat.  If what follows this jump
467           clearly won't match what the repeat does, such that we can be
468           sure that there is no use backtracking out of repetitions
469           already matched, then we change it to a pop_failure_jump.
470           Followed by two-byte address.  */
471  maybe_pop_jump,
472
473        /* Jump to following two-byte address, and push a dummy failure
474           point. This failure point will be thrown away if an attempt
475           is made to use it for a failure.  A `+' construct makes this
476           before the first repeat.  Also used as an intermediary kind
477           of jump when compiling an alternative.  */
478  dummy_failure_jump,
479
480        /* Push a dummy failure point and continue.  Used at the end of
481           alternatives.  */
482  push_dummy_failure,
483
484        /* Followed by two-byte relative address and two-byte number n.
485           After matching N times, jump to the address upon failure.  */
486  succeed_n,
487
488        /* Followed by two-byte relative address, and two-byte number n.
489           Jump to the address N times, then fail.  */
490  jump_n,
491
492        /* Set the following two-byte relative address to the
493           subsequent two-byte number.  The address *includes* the two
494           bytes of number.  */
495  set_number_at,
496
497  wordchar,     /* Matches any word-constituent character.  */
498  notwordchar,  /* Matches any char that is not a word-constituent.  */
499
500  wordbeg,      /* Succeeds if at word beginning.  */
501  wordend,      /* Succeeds if at word end.  */
502
503  wordbound,    /* Succeeds if at a word boundary.  */
504  notwordbound  /* Succeeds if not at a word boundary.  */
505
506#ifdef emacs
507  ,before_dot,  /* Succeeds if before point.  */
508  at_dot,       /* Succeeds if at point.  */
509  after_dot,    /* Succeeds if after point.  */
510
511        /* Matches any character whose syntax is specified.  Followed by
512           a byte which contains a syntax code, e.g., Sword.  */
513  syntaxspec,
514
515        /* Matches any character whose syntax is not that specified.  */
516  notsyntaxspec
517#endif /* emacs */
518} re_opcode_t;
519
520/* Common operations on the compiled pattern.  */
521
522/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
523
524#define STORE_NUMBER(destination, number)                               \
525  do {                                                                  \
526    (destination)[0] = (number) & 0377;                                 \
527    (destination)[1] = (number) >> 8;                                   \
528  } while (0)
529
530/* Same as STORE_NUMBER, except increment DESTINATION to
531   the byte after where the number is stored.  Therefore, DESTINATION
532   must be an lvalue.  */
533
534#define STORE_NUMBER_AND_INCR(destination, number)                      \
535  do {                                                                  \
536    STORE_NUMBER (destination, number);                                 \
537    (destination) += 2;                                                 \
538  } while (0)
539
540/* Put into DESTINATION a number stored in two contiguous bytes starting
541   at SOURCE.  */
542
543#define EXTRACT_NUMBER(destination, source)                             \
544  do {                                                                  \
545    (destination) = *(source) & 0377;                                   \
546    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;           \
547  } while (0)
548
549#ifdef DEBUG
550static void extract_number _RE_ARGS ((int *dest, unsigned char *source));
551static void
552extract_number (dest, source)
553    int *dest;
554    unsigned char *source;
555{
556  int temp = SIGN_EXTEND_CHAR (*(source + 1));
557  *dest = *source & 0377;
558  *dest += temp << 8;
559}
560
561# ifndef EXTRACT_MACROS /* To debug the macros.  */
562#  undef EXTRACT_NUMBER
563#  define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
564# endif /* not EXTRACT_MACROS */
565
566#endif /* DEBUG */
567
568/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
569   SOURCE must be an lvalue.  */
570
571#define EXTRACT_NUMBER_AND_INCR(destination, source)                    \
572  do {                                                                  \
573    EXTRACT_NUMBER (destination, source);                               \
574    (source) += 2;                                                      \
575  } while (0)
576
577#ifdef DEBUG
578static void extract_number_and_incr _RE_ARGS ((int *destination,
579                                               unsigned char **source));
580static void
581extract_number_and_incr (destination, source)
582    int *destination;
583    unsigned char **source;
584{
585  extract_number (destination, *source);
586  *source += 2;
587}
588
589# ifndef EXTRACT_MACROS
590#  undef EXTRACT_NUMBER_AND_INCR
591#  define EXTRACT_NUMBER_AND_INCR(dest, src) \
592  extract_number_and_incr (&dest, &src)
593# endif /* not EXTRACT_MACROS */
594
595#endif /* DEBUG */
596
597/* If DEBUG is defined, Regex prints many voluminous messages about what
598   it is doing (if the variable `debug' is nonzero).  If linked with the
599   main program in `iregex.c', you can enter patterns and strings
600   interactively.  And if linked with the main program in `main.c' and
601   the other test files, you can run the already-written tests.  */
602
603#ifdef DEBUG
604
605/* We use standard I/O for debugging.  */
606# include <stdio.h>
607
608/* It is useful to test things that ``must'' be true when debugging.  */
609# include <assert.h>
610
611static int debug = 0;
612
613# define DEBUG_STATEMENT(e) e
614# define DEBUG_PRINT1(x) if (debug) printf (x)
615# define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
616# define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
617# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
618# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)                          \
619  if (debug) print_partial_compiled_pattern (s, e)
620# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)                 \
621  if (debug) print_double_string (w, s1, sz1, s2, sz2)
622
623
624/* Print the fastmap in human-readable form.  */
625
626void
627print_fastmap (fastmap)
628    char *fastmap;
629{
630  unsigned was_a_range = 0;
631  unsigned i = 0;
632
633  while (i < (1 << BYTEWIDTH))
634    {
635      if (fastmap[i++])
636        {
637          was_a_range = 0;
638          putchar (i - 1);
639          while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
640            {
641              was_a_range = 1;
642              i++;
643            }
644          if (was_a_range)
645            {
646              printf ("-");
647              putchar (i - 1);
648            }
649        }
650    }
651  putchar ('\n');
652}
653
654
655/* Print a compiled pattern string in human-readable form, starting at
656   the START pointer into it and ending just before the pointer END.  */
657
658void
659print_partial_compiled_pattern (start, end)
660    unsigned char *start;
661    unsigned char *end;
662{
663  int mcnt, mcnt2;
664  unsigned char *p1;
665  unsigned char *p = start;
666  unsigned char *pend = end;
667
668  if (start == NULL)
669    {
670      printf ("(null)\n");
671      return;
672    }
673
674  /* Loop over pattern commands.  */
675  while (p < pend)
676    {
677      printf ("%d:\t", p - start);
678
679      switch ((re_opcode_t) *p++)
680        {
681        case no_op:
682          printf ("/no_op");
683          break;
684
685        case exactn:
686          mcnt = *p++;
687          printf ("/exactn/%d", mcnt);
688          do
689            {
690              putchar ('/');
691              putchar (*p++);
692            }
693          while (--mcnt);
694          break;
695
696        case start_memory:
697          mcnt = *p++;
698          printf ("/start_memory/%d/%d", mcnt, *p++);
699          break;
700
701        case stop_memory:
702          mcnt = *p++;
703          printf ("/stop_memory/%d/%d", mcnt, *p++);
704          break;
705
706        case duplicate:
707          printf ("/duplicate/%d", *p++);
708          break;
709
710        case anychar:
711          printf ("/anychar");
712          break;
713
714        case charset:
715        case charset_not:
716          {
717            register int c, last = -100;
718            register int in_range = 0;
719
720            printf ("/charset [%s",
721                    (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
722
723            assert (p + *p < pend);
724
725            for (c = 0; c < 256; c++)
726              if (c / 8 < *p
727                  && (p[1 + (c/8)] & (1 << (c % 8))))
728                {
729                  /* Are we starting a range?  */
730                  if (last + 1 == c && ! in_range)
731                    {
732                      putchar ('-');
733                      in_range = 1;
734                    }
735                  /* Have we broken a range?  */
736                  else if (last + 1 != c && in_range)
737              {
738                      putchar (last);
739                      in_range = 0;
740                    }
741
742                  if (! in_range)
743                    putchar (c);
744
745                  last = c;
746              }
747
748            if (in_range)
749              putchar (last);
750
751            putchar (']');
752
753            p += 1 + *p;
754          }
755          break;
756
757        case begline:
758          printf ("/begline");
759          break;
760
761        case endline:
762          printf ("/endline");
763          break;
764
765        case on_failure_jump:
766          extract_number_and_incr (&mcnt, &p);
767          printf ("/on_failure_jump to %d", p + mcnt - start);
768          break;
769
770        case on_failure_keep_string_jump:
771          extract_number_and_incr (&mcnt, &p);
772          printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
773          break;
774
775        case dummy_failure_jump:
776          extract_number_and_incr (&mcnt, &p);
777          printf ("/dummy_failure_jump to %d", p + mcnt - start);
778          break;
779
780        case push_dummy_failure:
781          printf ("/push_dummy_failure");
782          break;
783
784        case maybe_pop_jump:
785          extract_number_and_incr (&mcnt, &p);
786          printf ("/maybe_pop_jump to %d", p + mcnt - start);
787          break;
788
789        case pop_failure_jump:
790          extract_number_and_incr (&mcnt, &p);
791          printf ("/pop_failure_jump to %d", p + mcnt - start);
792          break;
793
794        case jump_past_alt:
795          extract_number_and_incr (&mcnt, &p);
796          printf ("/jump_past_alt to %d", p + mcnt - start);
797          break;
798
799        case jump:
800          extract_number_and_incr (&mcnt, &p);
801          printf ("/jump to %d", p + mcnt - start);
802          break;
803
804        case succeed_n:
805          extract_number_and_incr (&mcnt, &p);
806          p1 = p + mcnt;
807          extract_number_and_incr (&mcnt2, &p);
808          printf ("/succeed_n to %d, %d times", p1 - start, mcnt2);
809          break;
810
811        case jump_n:
812          extract_number_and_incr (&mcnt, &p);
813          p1 = p + mcnt;
814          extract_number_and_incr (&mcnt2, &p);
815          printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
816          break;
817
818        case set_number_at:
819          extract_number_and_incr (&mcnt, &p);
820          p1 = p + mcnt;
821          extract_number_and_incr (&mcnt2, &p);
822          printf ("/set_number_at location %d to %d", p1 - start, mcnt2);
823          break;
824
825        case wordbound:
826          printf ("/wordbound");
827          break;
828
829        case notwordbound:
830          printf ("/notwordbound");
831          break;
832
833        case wordbeg:
834          printf ("/wordbeg");
835          break;
836
837        case wordend:
838          printf ("/wordend");
839
840# ifdef emacs
841        case before_dot:
842          printf ("/before_dot");
843          break;
844
845        case at_dot:
846          printf ("/at_dot");
847          break;
848
849        case after_dot:
850          printf ("/after_dot");
851          break;
852
853        case syntaxspec:
854          printf ("/syntaxspec");
855          mcnt = *p++;
856          printf ("/%d", mcnt);
857          break;
858
859        case notsyntaxspec:
860          printf ("/notsyntaxspec");
861          mcnt = *p++;
862          printf ("/%d", mcnt);
863          break;
864# endif /* emacs */
865
866        case wordchar:
867          printf ("/wordchar");
868          break;
869
870        case notwordchar:
871          printf ("/notwordchar");
872          break;
873
874        case begbuf:
875          printf ("/begbuf");
876          break;
877
878        case endbuf:
879          printf ("/endbuf");
880          break;
881
882        default:
883          printf ("?%d", *(p-1));
884        }
885
886      putchar ('\n');
887    }
888
889  printf ("%d:\tend of pattern.\n", p - start);
890}
891
892
893void
894print_compiled_pattern (bufp)
895    struct re_pattern_buffer *bufp;
896{
897  unsigned char *buffer = bufp->buffer;
898
899  print_partial_compiled_pattern (buffer, buffer + bufp->used);
900  printf ("%ld bytes used/%ld bytes allocated.\n",
901          bufp->used, bufp->allocated);
902
903  if (bufp->fastmap_accurate && bufp->fastmap)
904    {
905      printf ("fastmap: ");
906      print_fastmap (bufp->fastmap);
907    }
908
909  printf ("re_nsub: %d\t", bufp->re_nsub);
910  printf ("regs_alloc: %d\t", bufp->regs_allocated);
911  printf ("can_be_null: %d\t", bufp->can_be_null);
912  printf ("newline_anchor: %d\n", bufp->newline_anchor);
913  printf ("no_sub: %d\t", bufp->no_sub);
914  printf ("not_bol: %d\t", bufp->not_bol);
915  printf ("not_eol: %d\t", bufp->not_eol);
916  printf ("syntax: %lx\n", bufp->syntax);
917  /* Perhaps we should print the translate table?  */
918}
919
920
921void
922print_double_string (where, string1, size1, string2, size2)
923    const char *where;
924    const char *string1;
925    const char *string2;
926    int size1;
927    int size2;
928{
929  int this_char;
930
931  if (where == NULL)
932    printf ("(null)");
933  else
934    {
935      if (FIRST_STRING_P (where))
936        {
937          for (this_char = where - string1; this_char < size1; this_char++)
938            putchar (string1[this_char]);
939
940          where = string2;
941        }
942
943      for (this_char = where - string2; this_char < size2; this_char++)
944        putchar (string2[this_char]);
945    }
946}
947
948void
949printchar (c)
950     int c;
951{
952  putc (c, stderr);
953}
954
955#else /* not DEBUG */
956
957# undef assert
958# define assert(e)
959
960# define DEBUG_STATEMENT(e)
961# define DEBUG_PRINT1(x)
962# define DEBUG_PRINT2(x1, x2)
963# define DEBUG_PRINT3(x1, x2, x3)
964# define DEBUG_PRINT4(x1, x2, x3, x4)
965# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
966# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
967
968#endif /* not DEBUG */
969
970/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
971   also be assigned to arbitrarily: each pattern buffer stores its own
972   syntax, so it can be changed between regex compilations.  */
973/* This has no initializer because initialized variables in Emacs
974   become read-only after dumping.  */
975reg_syntax_t re_syntax_options;
976
977
978/* Specify the precise syntax of regexps for compilation.  This provides
979   for compatibility for various utilities which historically have
980   different, incompatible syntaxes.
981
982   The argument SYNTAX is a bit mask comprised of the various bits
983   defined in regex-gnu.h.  We return the old syntax.  */
984
985reg_syntax_t
986re_set_syntax (syntax)
987    reg_syntax_t syntax;
988{
989  reg_syntax_t ret = re_syntax_options;
990
991  re_syntax_options = syntax;
992#ifdef DEBUG
993  if (syntax & RE_DEBUG)
994    debug = 1;
995  else if (debug) /* was on but now is not */
996    debug = 0;
997#endif /* DEBUG */
998  return ret;
999}
1000#ifdef _LIBC
1001weak_alias (__re_set_syntax, re_set_syntax)
1002#endif
1003
1004/* This table gives an error message for each of the error codes listed
1005   in regex-gnu.h.  Obviously the order here has to be same as there.
1006   POSIX doesn't require that we do anything for REG_NOERROR,
1007   but why not be nice?  */
1008
1009static const char *re_error_msgid[] =
1010  {
1011    gettext_noop ("Success"),   /* REG_NOERROR */
1012    gettext_noop ("No match"),  /* REG_NOMATCH */
1013    gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
1014    gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
1015    gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
1016    gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
1017    gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
1018    gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */
1019    gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
1020    gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
1021    gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
1022    gettext_noop ("Invalid range end"), /* REG_ERANGE */
1023    gettext_noop ("Memory exhausted"), /* REG_ESPACE */
1024    gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
1025    gettext_noop ("Premature end of regular expression"), /* REG_EEND */
1026    gettext_noop ("Regular expression too big"), /* REG_ESIZE */
1027    gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
1028  };
1029
1030/* Avoiding alloca during matching, to placate r_alloc.  */
1031
1032/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1033   searching and matching functions should not call alloca.  On some
1034   systems, alloca is implemented in terms of malloc, and if we're
1035   using the relocating allocator routines, then malloc could cause a
1036   relocation, which might (if the strings being searched are in the
1037   ralloc heap) shift the data out from underneath the regexp
1038   routines.
1039
1040   Here's another reason to avoid allocation: Emacs
1041   processes input from X in a signal handler; processing X input may
1042   call malloc; if input arrives while a matching routine is calling
1043   malloc, then we're scrod.  But Emacs can't just block input while
1044   calling matching routines; then we don't notice interrupts when
1045   they come in.  So, Emacs blocks input around all regexp calls
1046   except the matching calls, which it leaves unprotected, in the
1047   faith that they will not malloc.  */
1048
1049/* Normally, this is fine.  */
1050#define MATCH_MAY_ALLOCATE
1051
1052/* When using GNU C, we are not REALLY using the C alloca, no matter
1053   what config.h may say.  So don't take precautions for it.  */
1054#ifdef __GNUC__
1055# undef C_ALLOCA
1056#endif
1057
1058/* The match routines may not allocate if (1) they would do it with malloc
1059   and (2) it's not safe for them to use malloc.
1060   Note that if REL_ALLOC is defined, matching would not use malloc for the
1061   failure stack, but we would still use it for the register vectors;
1062   so REL_ALLOC should not affect this.  */
1063#if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
1064# undef MATCH_MAY_ALLOCATE
1065#endif
1066
1067
1068/* Failure stack declarations and macros; both re_compile_fastmap and
1069   re_match_2 use a failure stack.  These have to be macros because of
1070   REGEX_ALLOCATE_STACK.  */
1071
1072
1073/* Number of failure points for which to initially allocate space
1074   when matching.  If this number is exceeded, we allocate more
1075   space, so it is not a hard limit.  */
1076#ifndef INIT_FAILURE_ALLOC
1077# define INIT_FAILURE_ALLOC 5
1078#endif
1079
1080/* Roughly the maximum number of failure points on the stack.  Would be
1081   exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
1082   This is a variable only so users of regex can assign to it; we never
1083   change it ourselves.  */
1084
1085#ifdef INT_IS_16BIT
1086
1087# if defined MATCH_MAY_ALLOCATE
1088/* 4400 was enough to cause a crash on Alpha OSF/1,
1089   whose default stack limit is 2mb.  */
1090long int re_max_failures = 4000;
1091# else
1092long int re_max_failures = 2000;
1093# endif
1094
1095union fail_stack_elt
1096{
1097  unsigned char *pointer;
1098  long int integer;
1099};
1100
1101typedef union fail_stack_elt fail_stack_elt_t;
1102
1103typedef struct
1104{
1105  fail_stack_elt_t *stack;
1106  unsigned long int size;
1107  unsigned long int avail;              /* Offset of next open position.  */
1108} fail_stack_type;
1109
1110#else /* not INT_IS_16BIT */
1111
1112# if defined MATCH_MAY_ALLOCATE
1113/* 4400 was enough to cause a crash on Alpha OSF/1,
1114   whose default stack limit is 2mb.  */
1115int re_max_failures = 20000;
1116# else
1117int re_max_failures = 2000;
1118# endif
1119
1120union fail_stack_elt
1121{
1122  unsigned char *pointer;
1123  int integer;
1124};
1125
1126typedef union fail_stack_elt fail_stack_elt_t;
1127
1128typedef struct
1129{
1130  fail_stack_elt_t *stack;
1131  unsigned size;
1132  unsigned avail;                       /* Offset of next open position.  */
1133} fail_stack_type;
1134
1135#endif /* INT_IS_16BIT */
1136
1137#define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1138#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1139#define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1140
1141
1142/* Define macros to initialize and free the failure stack.
1143   Do `return -2' if the alloc fails.  */
1144
1145#ifdef MATCH_MAY_ALLOCATE
1146# define INIT_FAIL_STACK()                                              \
1147  do {                                                                  \
1148    fail_stack.stack = (fail_stack_elt_t *)                             \
1149      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
1150                                                                        \
1151    if (fail_stack.stack == NULL)                                       \
1152      return -2;                                                        \
1153                                                                        \
1154    fail_stack.size = INIT_FAILURE_ALLOC;                               \
1155    fail_stack.avail = 0;                                               \
1156  } while (0)
1157
1158# define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1159#else
1160# define INIT_FAIL_STACK()                                              \
1161  do {                                                                  \
1162    fail_stack.avail = 0;                                               \
1163  } while (0)
1164
1165# define RESET_FAIL_STACK()
1166#endif
1167
1168
1169/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1170
1171   Return 1 if succeeds, and 0 if either ran out of memory
1172   allocating space for it or it was already too large.
1173
1174   REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1175
1176#define DOUBLE_FAIL_STACK(fail_stack)                                   \
1177  ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \
1178   ? 0                                                                  \
1179   : ((fail_stack).stack = (fail_stack_elt_t *)                         \
1180        REGEX_REALLOCATE_STACK ((fail_stack).stack,                     \
1181          (fail_stack).size * sizeof (fail_stack_elt_t),                \
1182          ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),        \
1183                                                                        \
1184      (fail_stack).stack == NULL                                        \
1185      ? 0                                                               \
1186      : ((fail_stack).size <<= 1,                                       \
1187         1)))
1188
1189
1190/* Push pointer POINTER on FAIL_STACK.
1191   Return 1 if was able to do so and 0 if ran out of memory allocating
1192   space to do so.  */
1193#define PUSH_PATTERN_OP(POINTER, FAIL_STACK)                            \
1194  ((FAIL_STACK_FULL ()                                                  \
1195    && !DOUBLE_FAIL_STACK (FAIL_STACK))                                 \
1196   ? 0                                                                  \
1197   : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,       \
1198      1))
1199
1200/* Push a pointer value onto the failure stack.
1201   Assumes the variable `fail_stack'.  Probably should only
1202   be called from within `PUSH_FAILURE_POINT'.  */
1203#define PUSH_FAILURE_POINTER(item)                                      \
1204  fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
1205
1206/* This pushes an integer-valued item onto the failure stack.
1207   Assumes the variable `fail_stack'.  Probably should only
1208   be called from within `PUSH_FAILURE_POINT'.  */
1209#define PUSH_FAILURE_INT(item)                                  \
1210  fail_stack.stack[fail_stack.avail++].integer = (item)
1211
1212/* Push a fail_stack_elt_t value onto the failure stack.
1213   Assumes the variable `fail_stack'.  Probably should only
1214   be called from within `PUSH_FAILURE_POINT'.  */
1215#define PUSH_FAILURE_ELT(item)                                  \
1216  fail_stack.stack[fail_stack.avail++] =  (item)
1217
1218/* These three POP... operations complement the three PUSH... operations.
1219   All assume that `fail_stack' is nonempty.  */
1220#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1221#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1222#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1223
1224/* Used to omit pushing failure point id's when we're not debugging.  */
1225#ifdef DEBUG
1226# define DEBUG_PUSH PUSH_FAILURE_INT
1227# define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1228#else
1229# define DEBUG_PUSH(item)
1230# define DEBUG_POP(item_addr)
1231#endif
1232
1233
1234/* Push the information about the state we will need
1235   if we ever fail back to it.
1236
1237   Requires variables fail_stack, regstart, regend, reg_info, and
1238   num_regs_pushed be declared.  DOUBLE_FAIL_STACK requires `destination'
1239   be declared.
1240
1241   Does `return FAILURE_CODE' if runs out of memory.  */
1242
1243#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)   \
1244  do {                                                                  \
1245    char *destination;                                                  \
1246    /* Must be int, so when we don't save any registers, the arithmetic \
1247       of 0 + -1 isn't done as unsigned.  */                            \
1248    /* Can't be int, since there is not a shred of a guarantee that int \
1249       is wide enough to hold a value of something to which pointer can \
1250       be assigned */                                                   \
1251    active_reg_t this_reg;                                              \
1252                                                                        \
1253    DEBUG_STATEMENT (failure_id++);                                     \
1254    DEBUG_STATEMENT (nfailure_points_pushed++);                         \
1255    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);           \
1256    DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
1257    DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
1258                                                                        \
1259    DEBUG_PRINT2 ("  slots needed: %ld\n", NUM_FAILURE_ITEMS);          \
1260    DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);       \
1261                                                                        \
1262    /* Ensure we have enough space allocated for what we will push.  */ \
1263    while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)                   \
1264      {                                                                 \
1265        if (!DOUBLE_FAIL_STACK (fail_stack))                            \
1266          return failure_code;                                          \
1267                                                                        \
1268        DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",              \
1269                       (fail_stack).size);                              \
1270        DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1271      }                                                                 \
1272                                                                        \
1273    /* Push the info, starting with the registers.  */                  \
1274    DEBUG_PRINT1 ("\n");                                                \
1275                                                                        \
1276    if (1)                                                              \
1277      for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1278           this_reg++)                                                  \
1279        {                                                               \
1280          DEBUG_PRINT2 ("  Pushing reg: %lu\n", this_reg);              \
1281          DEBUG_STATEMENT (num_regs_pushed++);                          \
1282                                                                        \
1283          DEBUG_PRINT2 ("    start: %p\n", regstart[this_reg]);         \
1284          PUSH_FAILURE_POINTER (regstart[this_reg]);                    \
1285                                                                        \
1286          DEBUG_PRINT2 ("    end: %p\n", regend[this_reg]);             \
1287          PUSH_FAILURE_POINTER (regend[this_reg]);                      \
1288                                                                        \
1289          DEBUG_PRINT2 ("    info: %p\n      ",                         \
1290                        reg_info[this_reg].word.pointer);               \
1291          DEBUG_PRINT2 (" match_null=%d",                               \
1292                        REG_MATCH_NULL_STRING_P (reg_info[this_reg]));  \
1293          DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));  \
1294          DEBUG_PRINT2 (" matched_something=%d",                        \
1295                        MATCHED_SOMETHING (reg_info[this_reg]));        \
1296          DEBUG_PRINT2 (" ever_matched=%d",                             \
1297                        EVER_MATCHED_SOMETHING (reg_info[this_reg]));   \
1298          DEBUG_PRINT1 ("\n");                                          \
1299          PUSH_FAILURE_ELT (reg_info[this_reg].word);                   \
1300        }                                                               \
1301                                                                        \
1302    DEBUG_PRINT2 ("  Pushing  low active reg: %ld\n", lowest_active_reg);\
1303    PUSH_FAILURE_INT (lowest_active_reg);                               \
1304                                                                        \
1305    DEBUG_PRINT2 ("  Pushing high active reg: %ld\n", highest_active_reg);\
1306    PUSH_FAILURE_INT (highest_active_reg);                              \
1307                                                                        \
1308    DEBUG_PRINT2 ("  Pushing pattern %p:\n", pattern_place);            \
1309    DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);           \
1310    PUSH_FAILURE_POINTER (pattern_place);                               \
1311                                                                        \
1312    DEBUG_PRINT2 ("  Pushing string %p: `", string_place);              \
1313    DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
1314                                 size2);                                \
1315    DEBUG_PRINT1 ("'\n");                                               \
1316    PUSH_FAILURE_POINTER (string_place);                                \
1317                                                                        \
1318    DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);            \
1319    DEBUG_PUSH (failure_id);                                            \
1320  } while (0)
1321
1322/* This is the number of items that are pushed and popped on the stack
1323   for each register.  */
1324#define NUM_REG_ITEMS  3
1325
1326/* Individual items aside from the registers.  */
1327#ifdef DEBUG
1328# define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
1329#else
1330# define NUM_NONREG_ITEMS 4
1331#endif
1332
1333/* We push at most this many items on the stack.  */
1334/* We used to use (num_regs - 1), which is the number of registers
1335   this regexp will save; but that was changed to 5
1336   to avoid stack overflow for a regexp with lots of parens.  */
1337#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1338
1339/* We actually push this many items.  */
1340#define NUM_FAILURE_ITEMS                               \
1341  (((0                                                  \
1342     ? 0 : highest_active_reg - lowest_active_reg + 1)  \
1343    * NUM_REG_ITEMS)                                    \
1344   + NUM_NONREG_ITEMS)
1345
1346/* How many items can still be added to the stack without overflowing it.  */
1347#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1348
1349
1350/* Pops what PUSH_FAIL_STACK pushes.
1351
1352   We restore into the parameters, all of which should be lvalues:
1353     STR -- the saved data position.
1354     PAT -- the saved pattern position.
1355     LOW_REG, HIGH_REG -- the highest and lowest active registers.
1356     REGSTART, REGEND -- arrays of string positions.
1357     REG_INFO -- array of information about each subexpression.
1358
1359   Also assumes the variables `fail_stack' and (if debugging), `bufp',
1360   `pend', `string1', `size1', `string2', and `size2'.  */
1361
1362#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1363{                                                                       \
1364  DEBUG_STATEMENT (unsigned failure_id;)                                \
1365  active_reg_t this_reg;                                                \
1366  const unsigned char *string_temp;                                     \
1367                                                                        \
1368  assert (!FAIL_STACK_EMPTY ());                                        \
1369                                                                        \
1370  /* Remove failure points and point to how many regs pushed.  */       \
1371  DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");                                \
1372  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);    \
1373  DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);     \
1374                                                                        \
1375  assert (fail_stack.avail >= NUM_NONREG_ITEMS);                        \
1376                                                                        \
1377  DEBUG_POP (&failure_id);                                              \
1378  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);              \
1379                                                                        \
1380  /* If the saved string location is NULL, it came from an              \
1381     on_failure_keep_string_jump opcode, and we want to throw away the  \
1382     saved NULL, thus retaining our current position in the string.  */ \
1383  string_temp = POP_FAILURE_POINTER ();                                 \
1384  if (string_temp != NULL)                                              \
1385    str = (const char *) string_temp;                                   \
1386                                                                        \
1387  DEBUG_PRINT2 ("  Popping string %p: `", str);                         \
1388  DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);      \
1389  DEBUG_PRINT1 ("'\n");                                                 \
1390                                                                        \
1391  pat = (unsigned char *) POP_FAILURE_POINTER ();                       \
1392  DEBUG_PRINT2 ("  Popping pattern %p:\n", pat);                        \
1393  DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);                       \
1394                                                                        \
1395  /* Restore register info.  */                                         \
1396  high_reg = (active_reg_t) POP_FAILURE_INT ();                         \
1397  DEBUG_PRINT2 ("  Popping high active reg: %ld\n", high_reg);          \
1398                                                                        \
1399  low_reg = (active_reg_t) POP_FAILURE_INT ();                          \
1400  DEBUG_PRINT2 ("  Popping  low active reg: %ld\n", low_reg);           \
1401                                                                        \
1402  if (1)                                                                \
1403    for (this_reg = high_reg; this_reg >= low_reg; this_reg--)          \
1404      {                                                                 \
1405        DEBUG_PRINT2 ("    Popping reg: %ld\n", this_reg);              \
1406                                                                        \
1407        reg_info[this_reg].word = POP_FAILURE_ELT ();                   \
1408        DEBUG_PRINT2 ("      info: %p\n",                               \
1409                      reg_info[this_reg].word.pointer);                 \
1410                                                                        \
1411        regend[this_reg] = (const char *) POP_FAILURE_POINTER ();       \
1412        DEBUG_PRINT2 ("      end: %p\n", regend[this_reg]);             \
1413                                                                        \
1414        regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();     \
1415        DEBUG_PRINT2 ("      start: %p\n", regstart[this_reg]);         \
1416      }                                                                 \
1417  else                                                                  \
1418    {                                                                   \
1419      for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1420        {                                                               \
1421          reg_info[this_reg].word.integer = 0;                          \
1422          regend[this_reg] = 0;                                         \
1423          regstart[this_reg] = 0;                                       \
1424        }                                                               \
1425      highest_active_reg = high_reg;                                    \
1426    }                                                                   \
1427                                                                        \
1428  set_regs_matched_done = 0;                                            \
1429  DEBUG_STATEMENT (nfailure_points_popped++);                           \
1430} /* POP_FAILURE_POINT */
1431
1432
1433
1434/* Structure for per-register (a.k.a. per-group) information.
1435   Other register information, such as the
1436   starting and ending positions (which are addresses), and the list of
1437   inner groups (which is a bits list) are maintained in separate
1438   variables.
1439
1440   We are making a (strictly speaking) nonportable assumption here: that
1441   the compiler will pack our bit fields into something that fits into
1442   the type of `word', i.e., is something that fits into one item on the
1443   failure stack.  */
1444
1445
1446/* Declarations and macros for re_match_2.  */
1447
1448typedef union
1449{
1450  fail_stack_elt_t word;
1451  struct
1452  {
1453      /* This field is one if this group can match the empty string,
1454         zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1455#define MATCH_NULL_UNSET_VALUE 3
1456    unsigned match_null_string_p : 2;
1457    unsigned is_active : 1;
1458    unsigned matched_something : 1;
1459    unsigned ever_matched_something : 1;
1460  } bits;
1461} register_info_type;
1462
1463#define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1464#define IS_ACTIVE(R)  ((R).bits.is_active)
1465#define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1466#define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1467
1468
1469/* Call this when have matched a real character; it sets `matched' flags
1470   for the subexpressions which we are currently inside.  Also records
1471   that those subexprs have matched.  */
1472#define SET_REGS_MATCHED()                                              \
1473  do                                                                    \
1474    {                                                                   \
1475      if (!set_regs_matched_done)                                       \
1476        {                                                               \
1477          active_reg_t r;                                               \
1478          set_regs_matched_done = 1;                                    \
1479          for (r = lowest_active_reg; r <= highest_active_reg; r++)     \
1480            {                                                           \
1481              MATCHED_SOMETHING (reg_info[r])                           \
1482                = EVER_MATCHED_SOMETHING (reg_info[r])                  \
1483                = 1;                                                    \
1484            }                                                           \
1485        }                                                               \
1486    }                                                                   \
1487  while (0)
1488
1489/* Registers are set to a sentinel when they haven't yet matched.  */
1490static char reg_unset_dummy;
1491#define REG_UNSET_VALUE (&reg_unset_dummy)
1492#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1493
1494/* Subroutine declarations and macros for regex_compile.  */
1495
1496static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size,
1497                                              reg_syntax_t syntax,
1498                                              struct re_pattern_buffer *bufp));
1499static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
1500static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1501                                 int arg1, int arg2));
1502static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1503                                  int arg, unsigned char *end));
1504static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1505                                  int arg1, int arg2, unsigned char *end));
1506static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
1507                                           reg_syntax_t syntax));
1508static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
1509                                           reg_syntax_t syntax));
1510static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
1511                                              const char *pend,
1512                                              char *translate,
1513                                              reg_syntax_t syntax,
1514                                              unsigned char *b));
1515
1516/* Fetch the next character in the uncompiled pattern---translating it
1517   if necessary.  Also cast from a signed character in the constant
1518   string passed to us by the user to an unsigned char that we can use
1519   as an array index (in, e.g., `translate').  */
1520#ifndef PATFETCH
1521# define PATFETCH(c)                                                    \
1522  do {if (p == pend) return REG_EEND;                                   \
1523    c = (unsigned char) *p++;                                           \
1524    if (translate) c = (unsigned char) translate[c];                    \
1525  } while (0)
1526#endif
1527
1528/* Fetch the next character in the uncompiled pattern, with no
1529   translation.  */
1530#define PATFETCH_RAW(c)                                                 \
1531  do {if (p == pend) return REG_EEND;                                   \
1532    c = (unsigned char) *p++;                                           \
1533  } while (0)
1534
1535/* Go backwards one character in the pattern.  */
1536#define PATUNFETCH p--
1537
1538
1539/* If `translate' is non-null, return translate[D], else just D.  We
1540   cast the subscript to translate because some data is declared as
1541   `char *', to avoid warnings when a string constant is passed.  But
1542   when we use a character as a subscript we must make it unsigned.  */
1543#ifndef TRANSLATE
1544# define TRANSLATE(d) \
1545  (translate ? (char) translate[(unsigned char) (d)] : (d))
1546#endif
1547
1548
1549/* Macros for outputting the compiled pattern into `buffer'.  */
1550
1551/* If the buffer isn't allocated when it comes in, use this.  */
1552#define INIT_BUF_SIZE  32
1553
1554/* Make sure we have at least N more bytes of space in buffer.  */
1555#define GET_BUFFER_SPACE(n)                                             \
1556    while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)  \
1557      EXTEND_BUFFER ()
1558
1559/* Make sure we have one more byte of buffer space and then add C to it.  */
1560#define BUF_PUSH(c)                                                     \
1561  do {                                                                  \
1562    GET_BUFFER_SPACE (1);                                               \
1563    *b++ = (unsigned char) (c);                                         \
1564  } while (0)
1565
1566
1567/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1568#define BUF_PUSH_2(c1, c2)                                              \
1569  do {                                                                  \
1570    GET_BUFFER_SPACE (2);                                               \
1571    *b++ = (unsigned char) (c1);                                        \
1572    *b++ = (unsigned char) (c2);                                        \
1573  } while (0)
1574
1575
1576/* As with BUF_PUSH_2, except for three bytes.  */
1577#define BUF_PUSH_3(c1, c2, c3)                                          \
1578  do {                                                                  \
1579    GET_BUFFER_SPACE (3);                                               \
1580    *b++ = (unsigned char) (c1);                                        \
1581    *b++ = (unsigned char) (c2);                                        \
1582    *b++ = (unsigned char) (c3);                                        \
1583  } while (0)
1584
1585
1586/* Store a jump with opcode OP at LOC to location TO.  We store a
1587   relative address offset by the three bytes the jump itself occupies.  */
1588#define STORE_JUMP(op, loc, to) \
1589  store_op1 (op, loc, (int) ((to) - (loc) - 3))
1590
1591/* Likewise, for a two-argument jump.  */
1592#define STORE_JUMP2(op, loc, to, arg) \
1593  store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)
1594
1595/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
1596#define INSERT_JUMP(op, loc, to) \
1597  insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)
1598
1599/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
1600#define INSERT_JUMP2(op, loc, to, arg) \
1601  insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)
1602
1603
1604/* This is not an arbitrary limit: the arguments which represent offsets
1605   into the pattern are two bytes long.  So if 2^16 bytes turns out to
1606   be too small, many things would have to change.  */
1607/* Any other compiler which, like MSC, has allocation limit below 2^16
1608   bytes will have to use approach similar to what was done below for
1609   MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
1610   reallocating to 0 bytes.  Such thing is not going to work too well.
1611   You have been warned!!  */
1612#if defined _MSC_VER  && !defined WIN32
1613/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
1614   The REALLOC define eliminates a flurry of conversion warnings,
1615   but is not required. */
1616# define MAX_BUF_SIZE  65500L
1617# define REALLOC(p,s) realloc ((p), (size_t) (s))
1618#else
1619# define MAX_BUF_SIZE (1L << 16)
1620# define REALLOC(p,s) realloc ((p), (s))
1621#endif
1622
1623/* Extend the buffer by twice its current size via realloc and
1624   reset the pointers that pointed into the old block to point to the
1625   correct places in the new one.  If extending the buffer results in it
1626   being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
1627#define EXTEND_BUFFER()                                                 \
1628  do {                                                                  \
1629    unsigned char *old_buffer = bufp->buffer;                           \
1630    if (bufp->allocated == MAX_BUF_SIZE)                                \
1631      return REG_ESIZE;                                                 \
1632    bufp->allocated <<= 1;                                              \
1633    if (bufp->allocated > MAX_BUF_SIZE)                                 \
1634      bufp->allocated = MAX_BUF_SIZE;                                   \
1635    bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\
1636    if (bufp->buffer == NULL)                                           \
1637      return REG_ESPACE;                                                \
1638    /* If the buffer moved, move all the pointers into it.  */          \
1639    if (old_buffer != bufp->buffer)                                     \
1640      {                                                                 \
1641        b = (b - old_buffer) + bufp->buffer;                            \
1642        begalt = (begalt - old_buffer) + bufp->buffer;                  \
1643        if (fixup_alt_jump)                                             \
1644          fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
1645        if (laststart)                                                  \
1646          laststart = (laststart - old_buffer) + bufp->buffer;          \
1647        if (pending_exact)                                              \
1648          pending_exact = (pending_exact - old_buffer) + bufp->buffer;  \
1649      }                                                                 \
1650  } while (0)
1651
1652
1653/* Since we have one byte reserved for the register number argument to
1654   {start,stop}_memory, the maximum number of groups we can report
1655   things about is what fits in that byte.  */
1656#define MAX_REGNUM 255
1657
1658/* But patterns can have more than `MAX_REGNUM' registers.  We just
1659   ignore the excess.  */
1660typedef unsigned regnum_t;
1661
1662
1663/* Macros for the compile stack.  */
1664
1665/* Since offsets can go either forwards or backwards, this type needs to
1666   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
1667/* int may be not enough when sizeof(int) == 2.  */
1668typedef long pattern_offset_t;
1669
1670typedef struct
1671{
1672  pattern_offset_t begalt_offset;
1673  pattern_offset_t fixup_alt_jump;
1674  pattern_offset_t inner_group_offset;
1675  pattern_offset_t laststart_offset;
1676  regnum_t regnum;
1677} compile_stack_elt_t;
1678
1679
1680typedef struct
1681{
1682  compile_stack_elt_t *stack;
1683  unsigned size;
1684  unsigned avail;                       /* Offset of next open position.  */
1685} compile_stack_type;
1686
1687
1688#define INIT_COMPILE_STACK_SIZE 32
1689
1690#define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1691#define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1692
1693/* The next available element.  */
1694#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1695
1696
1697/* Set the bit for character C in a list.  */
1698#define SET_LIST_BIT(c)                               \
1699  (b[((unsigned char) (c)) / BYTEWIDTH]               \
1700   |= 1 << (((unsigned char) c) % BYTEWIDTH))
1701
1702
1703/* Get the next unsigned number in the uncompiled pattern.  */
1704#define GET_UNSIGNED_NUMBER(num)                                        \
1705  { if (p != pend)                                                      \
1706     {                                                                  \
1707       PATFETCH (c);                                                    \
1708       while (ISDIGIT (c))                                              \
1709         {                                                              \
1710           if (num < 0)                                                 \
1711              num = 0;                                                  \
1712           num = num * 10 + c - '0';                                    \
1713           if (p == pend)                                               \
1714              break;                                                    \
1715           PATFETCH (c);                                                \
1716         }                                                              \
1717       }                                                                \
1718    }
1719
1720#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
1721/* The GNU C library provides support for user-defined character classes
1722   and the functions from ISO C amendement 1.  */
1723# ifdef CHARCLASS_NAME_MAX
1724#  define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
1725# else
1726/* This shouldn't happen but some implementation might still have this
1727   problem.  Use a reasonable default value.  */
1728#  define CHAR_CLASS_MAX_LENGTH 256
1729# endif
1730
1731# ifdef _LIBC
1732#  define IS_CHAR_CLASS(string) __wctype (string)
1733# else
1734#  define IS_CHAR_CLASS(string) wctype (string)
1735# endif
1736#else
1737# define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1738
1739# define IS_CHAR_CLASS(string)                                          \
1740   (STREQ (string, "alpha") || STREQ (string, "upper")                  \
1741    || STREQ (string, "lower") || STREQ (string, "digit")               \
1742    || STREQ (string, "alnum") || STREQ (string, "xdigit")              \
1743    || STREQ (string, "space") || STREQ (string, "print")               \
1744    || STREQ (string, "punct") || STREQ (string, "graph")               \
1745    || STREQ (string, "cntrl") || STREQ (string, "blank"))
1746#endif
1747
1748#ifndef MATCH_MAY_ALLOCATE
1749
1750/* If we cannot allocate large objects within re_match_2_internal,
1751   we make the fail stack and register vectors global.
1752   The fail stack, we grow to the maximum size when a regexp
1753   is compiled.
1754   The register vectors, we adjust in size each time we
1755   compile a regexp, according to the number of registers it needs.  */
1756
1757static fail_stack_type fail_stack;
1758
1759/* Size with which the following vectors are currently allocated.
1760   That is so we can make them bigger as needed,
1761   but never make them smaller.  */
1762static int regs_allocated_size;
1763
1764static const char **     regstart, **     regend;
1765static const char ** old_regstart, ** old_regend;
1766static const char **best_regstart, **best_regend;
1767static register_info_type *reg_info;
1768static const char **reg_dummy;
1769static register_info_type *reg_info_dummy;
1770
1771/* Make the register vectors big enough for NUM_REGS registers,
1772   but don't make them smaller.  */
1773
1774static
1775regex_grow_registers (num_regs)
1776     int num_regs;
1777{
1778  if (num_regs > regs_allocated_size)
1779    {
1780      RETALLOC_IF (regstart,     num_regs, const char *);
1781      RETALLOC_IF (regend,       num_regs, const char *);
1782      RETALLOC_IF (old_regstart, num_regs, const char *);
1783      RETALLOC_IF (old_regend,   num_regs, const char *);
1784      RETALLOC_IF (best_regstart, num_regs, const char *);
1785      RETALLOC_IF (best_regend,  num_regs, const char *);
1786      RETALLOC_IF (reg_info,     num_regs, register_info_type);
1787      RETALLOC_IF (reg_dummy,    num_regs, const char *);
1788      RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
1789
1790      regs_allocated_size = num_regs;
1791    }
1792}
1793
1794#endif /* not MATCH_MAY_ALLOCATE */
1795
1796static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
1797                                                 compile_stack,
1798                                                 regnum_t regnum));
1799
1800/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1801   Returns one of error codes defined in `regex-gnu.h', or zero for success.
1802
1803   Assumes the `allocated' (and perhaps `buffer') and `translate'
1804   fields are set in BUFP on entry.
1805
1806   If it succeeds, results are put in BUFP (if it returns an error, the
1807   contents of BUFP are undefined):
1808     `buffer' is the compiled pattern;
1809     `syntax' is set to SYNTAX;
1810     `used' is set to the length of the compiled pattern;
1811     `fastmap_accurate' is zero;
1812     `re_nsub' is the number of subexpressions in PATTERN;
1813     `not_bol' and `not_eol' are zero;
1814
1815   The `fastmap' and `newline_anchor' fields are neither
1816   examined nor set.  */
1817
1818/* Return, freeing storage we allocated.  */
1819#define FREE_STACK_RETURN(value)                \
1820  return (free (compile_stack.stack), value)
1821
1822static reg_errcode_t
1823regex_compile (pattern, size, syntax, bufp)
1824     const char *pattern;
1825     size_t size;
1826     reg_syntax_t syntax;
1827     struct re_pattern_buffer *bufp;
1828{
1829  /* We fetch characters from PATTERN here.  Even though PATTERN is
1830     `char *' (i.e., signed), we declare these variables as unsigned, so
1831     they can be reliably used as array indices.  */
1832  register unsigned char c, c1;
1833
1834  /* A random temporary spot in PATTERN.  */
1835  const char *p1;
1836
1837  /* Points to the end of the buffer, where we should append.  */
1838  register unsigned char *b;
1839
1840  /* Keeps track of unclosed groups.  */
1841  compile_stack_type compile_stack;
1842
1843  /* Points to the current (ending) position in the pattern.  */
1844  const char *p = pattern;
1845  const char *pend = pattern + size;
1846
1847  /* How to translate the characters in the pattern.  */
1848  RE_TRANSLATE_TYPE translate = bufp->translate;
1849
1850  /* Address of the count-byte of the most recently inserted `exactn'
1851     command.  This makes it possible to tell if a new exact-match
1852     character can be added to that command or if the character requires
1853     a new `exactn' command.  */
1854  unsigned char *pending_exact = 0;
1855
1856  /* Address of start of the most recently finished expression.
1857     This tells, e.g., postfix * where to find the start of its
1858     operand.  Reset at the beginning of groups and alternatives.  */
1859  unsigned char *laststart = 0;
1860
1861  /* Address of beginning of regexp, or inside of last group.  */
1862  unsigned char *begalt;
1863
1864  /* Place in the uncompiled pattern (i.e., the {) to
1865     which to go back if the interval is invalid.  */
1866  const char *beg_interval;
1867
1868  /* Address of the place where a forward jump should go to the end of
1869     the containing expression.  Each alternative of an `or' -- except the
1870     last -- ends with a forward jump of this sort.  */
1871  unsigned char *fixup_alt_jump = 0;
1872
1873  /* Counts open-groups as they are encountered.  Remembered for the
1874     matching close-group on the compile stack, so the same register
1875     number is put in the stop_memory as the start_memory.  */
1876  regnum_t regnum = 0;
1877
1878#ifdef DEBUG
1879  DEBUG_PRINT1 ("\nCompiling pattern: ");
1880  if (debug)
1881    {
1882      unsigned debug_count;
1883
1884      for (debug_count = 0; debug_count < size; debug_count++)
1885        putchar (pattern[debug_count]);
1886      putchar ('\n');
1887    }
1888#endif /* DEBUG */
1889
1890  /* Initialize the compile stack.  */
1891  compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1892  if (compile_stack.stack == NULL)
1893    return REG_ESPACE;
1894
1895  compile_stack.size = INIT_COMPILE_STACK_SIZE;
1896  compile_stack.avail = 0;
1897
1898  /* Initialize the pattern buffer.  */
1899  bufp->syntax = syntax;
1900  bufp->fastmap_accurate = 0;
1901  bufp->not_bol = bufp->not_eol = 0;
1902
1903  /* Set `used' to zero, so that if we return an error, the pattern
1904     printer (for debugging) will think there's no pattern.  We reset it
1905     at the end.  */
1906  bufp->used = 0;
1907
1908  /* Always count groups, whether or not bufp->no_sub is set.  */
1909  bufp->re_nsub = 0;
1910
1911#if !defined emacs && !defined SYNTAX_TABLE
1912  /* Initialize the syntax table.  */
1913   init_syntax_once ();
1914#endif
1915
1916  if (bufp->allocated == 0)
1917    {
1918      if (bufp->buffer)
1919        { /* If zero allocated, but buffer is non-null, try to realloc
1920             enough space.  This loses if buffer's address is bogus, but
1921             that is the user's responsibility.  */
1922          RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1923        }
1924      else
1925        { /* Caller did not allocate a buffer.  Do it for them.  */
1926          bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1927        }
1928      if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
1929
1930      bufp->allocated = INIT_BUF_SIZE;
1931    }
1932
1933  begalt = b = bufp->buffer;
1934
1935  /* Loop through the uncompiled pattern until we're at the end.  */
1936  while (p != pend)
1937    {
1938      PATFETCH (c);
1939
1940      switch (c)
1941        {
1942        case '^':
1943          {
1944            if (   /* If at start of pattern, it's an operator.  */
1945                   p == pattern + 1
1946                   /* If context independent, it's an operator.  */
1947                || syntax & RE_CONTEXT_INDEP_ANCHORS
1948                   /* Otherwise, depends on what's come before.  */
1949                || at_begline_loc_p (pattern, p, syntax))
1950              BUF_PUSH (begline);
1951            else
1952              goto normal_char;
1953          }
1954          break;
1955
1956
1957        case '$':
1958          {
1959            if (   /* If at end of pattern, it's an operator.  */
1960                   p == pend
1961                   /* If context independent, it's an operator.  */
1962                || syntax & RE_CONTEXT_INDEP_ANCHORS
1963                   /* Otherwise, depends on what's next.  */
1964                || at_endline_loc_p (p, pend, syntax))
1965               BUF_PUSH (endline);
1966             else
1967               goto normal_char;
1968           }
1969           break;
1970
1971
1972        case '+':
1973        case '?':
1974          if ((syntax & RE_BK_PLUS_QM)
1975              || (syntax & RE_LIMITED_OPS))
1976            goto normal_char;
1977        handle_plus:
1978        case '*':
1979          /* If there is no previous pattern... */
1980          if (!laststart)
1981            {
1982              if (syntax & RE_CONTEXT_INVALID_OPS)
1983                FREE_STACK_RETURN (REG_BADRPT);
1984              else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1985                goto normal_char;
1986            }
1987
1988          {
1989            /* Are we optimizing this jump?  */
1990            boolean keep_string_p = false;
1991
1992            /* 1 means zero (many) matches is allowed.  */
1993            char zero_times_ok = 0, many_times_ok = 0;
1994
1995            /* If there is a sequence of repetition chars, collapse it
1996               down to just one (the right one).  We can't combine
1997               interval operators with these because of, e.g., `a{2}*',
1998               which should only match an even number of `a's.  */
1999
2000            for (;;)
2001              {
2002                zero_times_ok |= c != '+';
2003                many_times_ok |= c != '?';
2004
2005                if (p == pend)
2006                  break;
2007
2008                PATFETCH (c);
2009
2010                if (c == '*'
2011                    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
2012                  ;
2013
2014                else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
2015                  {
2016                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2017
2018                    PATFETCH (c1);
2019                    if (!(c1 == '+' || c1 == '?'))
2020                      {
2021                        PATUNFETCH;
2022                        PATUNFETCH;
2023                        break;
2024                      }
2025
2026                    c = c1;
2027                  }
2028                else
2029                  {
2030                    PATUNFETCH;
2031                    break;
2032                  }
2033
2034                /* If we get here, we found another repeat character.  */
2035               }
2036
2037            /* Star, etc. applied to an empty pattern is equivalent
2038               to an empty pattern.  */
2039            if (!laststart)
2040              break;
2041
2042            /* Now we know whether or not zero matches is allowed
2043               and also whether or not two or more matches is allowed.  */
2044            if (many_times_ok)
2045              { /* More than one repetition is allowed, so put in at the
2046                   end a backward relative jump from `b' to before the next
2047                   jump we're going to put in below (which jumps from
2048                   laststart to after this jump).
2049
2050                   But if we are at the `*' in the exact sequence `.*\n',
2051                   insert an unconditional jump backwards to the .,
2052                   instead of the beginning of the loop.  This way we only
2053                   push a failure point once, instead of every time
2054                   through the loop.  */
2055                assert (p - 1 > pattern);
2056
2057                /* Allocate the space for the jump.  */
2058                GET_BUFFER_SPACE (3);
2059
2060                /* We know we are not at the first character of the pattern,
2061                   because laststart was nonzero.  And we've already
2062                   incremented `p', by the way, to be the character after
2063                   the `*'.  Do we have to do something analogous here
2064                   for null bytes, because of RE_DOT_NOT_NULL?  */
2065                if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
2066                    && zero_times_ok
2067                    && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
2068                    && !(syntax & RE_DOT_NEWLINE))
2069                  { /* We have .*\n.  */
2070                    STORE_JUMP (jump, b, laststart);
2071                    keep_string_p = true;
2072                  }
2073                else
2074                  /* Anything else.  */
2075                  STORE_JUMP (maybe_pop_jump, b, laststart - 3);
2076
2077                /* We've added more stuff to the buffer.  */
2078                b += 3;
2079              }
2080
2081            /* On failure, jump from laststart to b + 3, which will be the
2082               end of the buffer after this jump is inserted.  */
2083            GET_BUFFER_SPACE (3);
2084            INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2085                                       : on_failure_jump,
2086                         laststart, b + 3);
2087            pending_exact = 0;
2088            b += 3;
2089
2090            if (!zero_times_ok)
2091              {
2092                /* At least one repetition is required, so insert a
2093                   `dummy_failure_jump' before the initial
2094                   `on_failure_jump' instruction of the loop. This
2095                   effects a skip over that instruction the first time
2096                   we hit that loop.  */
2097                GET_BUFFER_SPACE (3);
2098                INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
2099                b += 3;
2100              }
2101            }
2102          break;
2103
2104
2105        case '.':
2106          laststart = b;
2107          BUF_PUSH (anychar);
2108          break;
2109
2110
2111        case '[':
2112          {
2113            boolean had_char_class = false;
2114
2115            if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2116
2117            /* Ensure that we have enough space to push a charset: the
2118               opcode, the length count, and the bitset; 34 bytes in all.  */
2119            GET_BUFFER_SPACE (34);
2120
2121            laststart = b;
2122
2123            /* We test `*p == '^' twice, instead of using an if
2124               statement, so we only need one BUF_PUSH.  */
2125            BUF_PUSH (*p == '^' ? charset_not : charset);
2126            if (*p == '^')
2127              p++;
2128
2129            /* Remember the first position in the bracket expression.  */
2130            p1 = p;
2131
2132            /* Push the number of bytes in the bitmap.  */
2133            BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
2134
2135            /* Clear the whole map.  */
2136            bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
2137
2138            /* charset_not matches newline according to a syntax bit.  */
2139            if ((re_opcode_t) b[-2] == charset_not
2140                && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2141              SET_LIST_BIT ('\n');
2142
2143            /* Read in characters and ranges, setting map bits.  */
2144            for (;;)
2145              {
2146                if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2147
2148                PATFETCH (c);
2149
2150                /* \ might escape characters inside [...] and [^...].  */
2151                if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2152                  {
2153                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2154
2155                    PATFETCH (c1);
2156                    SET_LIST_BIT (c1);
2157                    continue;
2158                  }
2159
2160                /* Could be the end of the bracket expression.  If it's
2161                   not (i.e., when the bracket expression is `[]' so
2162                   far), the ']' character bit gets set way below.  */
2163                if (c == ']' && p != p1 + 1)
2164                  break;
2165
2166                /* Look ahead to see if it's a range when the last thing
2167                   was a character class.  */
2168                if (had_char_class && c == '-' && *p != ']')
2169                  FREE_STACK_RETURN (REG_ERANGE);
2170
2171                /* Look ahead to see if it's a range when the last thing
2172                   was a character: if this is a hyphen not at the
2173                   beginning or the end of a list, then it's the range
2174                   operator.  */
2175                if (c == '-'
2176                    && !(p - 2 >= pattern && p[-2] == '[')
2177                    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
2178                    && *p != ']')
2179                  {
2180                    reg_errcode_t ret
2181                      = compile_range (&p, pend, translate, syntax, b);
2182                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2183                  }
2184
2185                else if (p[0] == '-' && p[1] != ']')
2186                  { /* This handles ranges made up of characters only.  */
2187                    reg_errcode_t ret;
2188
2189                    /* Move past the `-'.  */
2190                    PATFETCH (c1);
2191
2192                    ret = compile_range (&p, pend, translate, syntax, b);
2193                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2194                  }
2195
2196                /* See if we're at the beginning of a possible character
2197                   class.  */
2198
2199                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2200                  { /* Leave room for the null.  */
2201                    char str[CHAR_CLASS_MAX_LENGTH + 1];
2202
2203                    PATFETCH (c);
2204                    c1 = 0;
2205
2206                    /* If pattern is `[[:'.  */
2207                    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2208
2209                    for (;;)
2210                      {
2211                        PATFETCH (c);
2212                        if ((c == ':' && *p == ']') || p == pend
2213                            || c1 == CHAR_CLASS_MAX_LENGTH)
2214                          break;
2215                        str[c1++] = c;
2216                      }
2217                    str[c1] = '\0';
2218
2219                    /* If isn't a word bracketed by `[:' and `:]':
2220                       undo the ending character, the letters, and leave
2221                       the leading `:' and `[' (but set bits for them).  */
2222                    if (c == ':' && *p == ']')
2223                      {
2224#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
2225                        boolean is_lower = STREQ (str, "lower");
2226                        boolean is_upper = STREQ (str, "upper");
2227                        wctype_t wt;
2228                        int ch;
2229
2230                        wt = IS_CHAR_CLASS (str);
2231                        if (wt == 0)
2232                          FREE_STACK_RETURN (REG_ECTYPE);
2233
2234                        /* Throw away the ] at the end of the character
2235                           class.  */
2236                        PATFETCH (c);
2237
2238                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2239
2240                        for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
2241                          {
2242# ifdef _LIBC
2243                            if (__iswctype (__btowc (ch), wt))
2244                              SET_LIST_BIT (ch);
2245#else
2246                            if (iswctype (btowc (ch), wt))
2247                              SET_LIST_BIT (ch);
2248#endif
2249
2250                            if (translate && (is_upper || is_lower)
2251                                && (ISUPPER (ch) || ISLOWER (ch)))
2252                              SET_LIST_BIT (ch);
2253                          }
2254
2255                        had_char_class = true;
2256#else
2257                        int ch;
2258                        boolean is_alnum = STREQ (str, "alnum");
2259                        boolean is_alpha = STREQ (str, "alpha");
2260                        boolean is_blank = STREQ (str, "blank");
2261                        boolean is_cntrl = STREQ (str, "cntrl");
2262                        boolean is_digit = STREQ (str, "digit");
2263                        boolean is_graph = STREQ (str, "graph");
2264                        boolean is_lower = STREQ (str, "lower");
2265                        boolean is_print = STREQ (str, "print");
2266                        boolean is_punct = STREQ (str, "punct");
2267                        boolean is_space = STREQ (str, "space");
2268                        boolean is_upper = STREQ (str, "upper");
2269                        boolean is_xdigit = STREQ (str, "xdigit");
2270
2271                        if (!IS_CHAR_CLASS (str))
2272                          FREE_STACK_RETURN (REG_ECTYPE);
2273
2274                        /* Throw away the ] at the end of the character
2275                           class.  */
2276                        PATFETCH (c);
2277
2278                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2279
2280                        for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
2281                          {
2282                            /* This was split into 3 if's to
2283                               avoid an arbitrary limit in some compiler.  */
2284                            if (   (is_alnum  && ISALNUM (ch))
2285                                || (is_alpha  && ISALPHA (ch))
2286                                || (is_blank  && ISBLANK (ch))
2287                                || (is_cntrl  && ISCNTRL (ch)))
2288                              SET_LIST_BIT (ch);
2289                            if (   (is_digit  && ISDIGIT (ch))
2290                                || (is_graph  && ISGRAPH (ch))
2291                                || (is_lower  && ISLOWER (ch))
2292                                || (is_print  && ISPRINT (ch)))
2293                              SET_LIST_BIT (ch);
2294                            if (   (is_punct  && ISPUNCT (ch))
2295                                || (is_space  && ISSPACE (ch))
2296                                || (is_upper  && ISUPPER (ch))
2297                                || (is_xdigit && ISXDIGIT (ch)))
2298                              SET_LIST_BIT (ch);
2299                            if (   translate && (is_upper || is_lower)
2300                                && (ISUPPER (ch) || ISLOWER (ch)))
2301                              SET_LIST_BIT (ch);
2302                          }
2303                        had_char_class = true;
2304#endif  /* libc || wctype.h */
2305                      }
2306                    else
2307                      {
2308                        c1++;
2309                        while (c1--)
2310                          PATUNFETCH;
2311                        SET_LIST_BIT ('[');
2312                        SET_LIST_BIT (':');
2313                        had_char_class = false;
2314                      }
2315                  }
2316                else
2317                  {
2318                    had_char_class = false;
2319                    SET_LIST_BIT (c);
2320                  }
2321              }
2322
2323            /* Discard any (non)matching list bytes that are all 0 at the
2324               end of the map.  Decrease the map-length byte too.  */
2325            while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
2326              b[-1]--;
2327            b += b[-1];
2328          }
2329          break;
2330
2331
2332        case '(':
2333          if (syntax & RE_NO_BK_PARENS)
2334            goto handle_open;
2335          else
2336            goto normal_char;
2337
2338
2339        case ')':
2340          if (syntax & RE_NO_BK_PARENS)
2341            goto handle_close;
2342          else
2343            goto normal_char;
2344
2345
2346        case '\n':
2347          if (syntax & RE_NEWLINE_ALT)
2348            goto handle_alt;
2349          else
2350            goto normal_char;
2351
2352
2353        case '|':
2354          if (syntax & RE_NO_BK_VBAR)
2355            goto handle_alt;
2356          else
2357            goto normal_char;
2358
2359
2360        case '{':
2361           if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2362             goto handle_interval;
2363           else
2364             goto normal_char;
2365
2366
2367        case '\\':
2368          if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2369
2370          /* Do not translate the character after the \, so that we can
2371             distinguish, e.g., \B from \b, even if we normally would
2372             translate, e.g., B to b.  */
2373          PATFETCH_RAW (c);
2374
2375          switch (c)
2376            {
2377            case '(':
2378              if (syntax & RE_NO_BK_PARENS)
2379                goto normal_backslash;
2380
2381            handle_open:
2382              bufp->re_nsub++;
2383              regnum++;
2384
2385              if (COMPILE_STACK_FULL)
2386                {
2387                  RETALLOC (compile_stack.stack, compile_stack.size << 1,
2388                            compile_stack_elt_t);
2389                  if (compile_stack.stack == NULL) return REG_ESPACE;
2390
2391                  compile_stack.size <<= 1;
2392                }
2393
2394              /* These are the values to restore when we hit end of this
2395                 group.  They are all relative offsets, so that if the
2396                 whole pattern moves because of realloc, they will still
2397                 be valid.  */
2398              COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
2399              COMPILE_STACK_TOP.fixup_alt_jump
2400                = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2401              COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2402              COMPILE_STACK_TOP.regnum = regnum;
2403
2404              /* We will eventually replace the 0 with the number of
2405                 groups inner to this one.  But do not push a
2406                 start_memory for groups beyond the last one we can
2407                 represent in the compiled pattern.  */
2408              if (regnum <= MAX_REGNUM)
2409                {
2410                  COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
2411                  BUF_PUSH_3 (start_memory, regnum, 0);
2412                }
2413
2414              compile_stack.avail++;
2415
2416              fixup_alt_jump = 0;
2417              laststart = 0;
2418              begalt = b;
2419              /* If we've reached MAX_REGNUM groups, then this open
2420                 won't actually generate any code, so we'll have to
2421                 clear pending_exact explicitly.  */
2422              pending_exact = 0;
2423              break;
2424
2425
2426            case ')':
2427              if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
2428
2429              if (COMPILE_STACK_EMPTY)
2430                {
2431                  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2432                    goto normal_backslash;
2433                  else
2434                    FREE_STACK_RETURN (REG_ERPAREN);
2435                }
2436
2437            handle_close:
2438              if (fixup_alt_jump)
2439                { /* Push a dummy failure point at the end of the
2440                     alternative for a possible future
2441                     `pop_failure_jump' to pop.  See comments at
2442                     `push_dummy_failure' in `re_match_2'.  */
2443                  BUF_PUSH (push_dummy_failure);
2444
2445                  /* We allocated space for this jump when we assigned
2446                     to `fixup_alt_jump', in the `handle_alt' case below.  */
2447                  STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
2448                }
2449
2450              /* See similar code for backslashed left paren above.  */
2451              if (COMPILE_STACK_EMPTY)
2452                {
2453                  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2454                    goto normal_char;
2455                  else
2456                    FREE_STACK_RETURN (REG_ERPAREN);
2457                }
2458
2459              /* Since we just checked for an empty stack above, this
2460                 ``can't happen''.  */
2461              assert (compile_stack.avail != 0);
2462              {
2463                /* We don't just want to restore into `regnum', because
2464                   later groups should continue to be numbered higher,
2465                   as in `(ab)c(de)' -- the second group is #2.  */
2466                regnum_t this_group_regnum;
2467
2468                compile_stack.avail--;
2469                begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
2470                fixup_alt_jump
2471                  = COMPILE_STACK_TOP.fixup_alt_jump
2472                    ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
2473                    : 0;
2474                laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
2475                this_group_regnum = COMPILE_STACK_TOP.regnum;
2476                /* If we've reached MAX_REGNUM groups, then this open
2477                   won't actually generate any code, so we'll have to
2478                   clear pending_exact explicitly.  */
2479                pending_exact = 0;
2480
2481                /* We're at the end of the group, so now we know how many
2482                   groups were inside this one.  */
2483                if (this_group_regnum <= MAX_REGNUM)
2484                  {
2485                    unsigned char *inner_group_loc
2486                      = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
2487
2488                    *inner_group_loc = regnum - this_group_regnum;
2489                    BUF_PUSH_3 (stop_memory, this_group_regnum,
2490                                regnum - this_group_regnum);
2491                  }
2492              }
2493              break;
2494
2495
2496            case '|':                                   /* `\|'.  */
2497              if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
2498                goto normal_backslash;
2499            handle_alt:
2500              if (syntax & RE_LIMITED_OPS)
2501                goto normal_char;
2502
2503              /* Insert before the previous alternative a jump which
2504                 jumps to this alternative if the former fails.  */
2505              GET_BUFFER_SPACE (3);
2506              INSERT_JUMP (on_failure_jump, begalt, b + 6);
2507              pending_exact = 0;
2508              b += 3;
2509
2510              /* The alternative before this one has a jump after it
2511                 which gets executed if it gets matched.  Adjust that
2512                 jump so it will jump to this alternative's analogous
2513                 jump (put in below, which in turn will jump to the next
2514                 (if any) alternative's such jump, etc.).  The last such
2515                 jump jumps to the correct final destination.  A picture:
2516                          _____ _____
2517                          |   | |   |
2518                          |   v |   v
2519                         a | b   | c
2520
2521                 If we are at `b', then fixup_alt_jump right now points to a
2522                 three-byte space after `a'.  We'll put in the jump, set
2523                 fixup_alt_jump to right after `b', and leave behind three
2524                 bytes which we'll fill in when we get to after `c'.  */
2525
2526              if (fixup_alt_jump)
2527                STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2528
2529              /* Mark and leave space for a jump after this alternative,
2530                 to be filled in later either by next alternative or
2531                 when know we're at the end of a series of alternatives.  */
2532              fixup_alt_jump = b;
2533              GET_BUFFER_SPACE (3);
2534              b += 3;
2535
2536              laststart = 0;
2537              begalt = b;
2538              break;
2539
2540
2541            case '{':
2542              /* If \{ is a literal.  */
2543              if (!(syntax & RE_INTERVALS)
2544                     /* If we're at `\{' and it's not the open-interval
2545                        operator.  */
2546                  || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
2547                  || (p - 2 == pattern  &&  p == pend))
2548                goto normal_backslash;
2549
2550            handle_interval:
2551              {
2552                /* If got here, then the syntax allows intervals.  */
2553
2554                /* At least (most) this many matches must be made.  */
2555                int lower_bound = -1, upper_bound = -1;
2556
2557                beg_interval = p - 1;
2558
2559                if (p == pend)
2560                  {
2561                    if (syntax & RE_NO_BK_BRACES)
2562                      goto unfetch_interval;
2563                    else
2564                      FREE_STACK_RETURN (REG_EBRACE);
2565                  }
2566
2567                GET_UNSIGNED_NUMBER (lower_bound);
2568
2569                if (c == ',')
2570                  {
2571                    GET_UNSIGNED_NUMBER (upper_bound);
2572                    if (upper_bound < 0) upper_bound = RE_DUP_MAX;
2573                  }
2574                else
2575                  /* Interval such as `{1}' => match exactly once. */
2576                  upper_bound = lower_bound;
2577
2578                if (lower_bound < 0 || upper_bound > RE_DUP_MAX
2579                    || lower_bound > upper_bound)
2580                  {
2581                    if (syntax & RE_NO_BK_BRACES)
2582                      goto unfetch_interval;
2583                    else
2584                      FREE_STACK_RETURN (REG_BADBR);
2585                  }
2586
2587                if (!(syntax & RE_NO_BK_BRACES))
2588                  {
2589                    if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
2590
2591                    PATFETCH (c);
2592                  }
2593
2594                if (c != '}')
2595                  {
2596                    if (syntax & RE_NO_BK_BRACES)
2597                      goto unfetch_interval;
2598                    else
2599                      FREE_STACK_RETURN (REG_BADBR);
2600                  }
2601
2602                /* We just parsed a valid interval.  */
2603
2604                /* If it's invalid to have no preceding re.  */
2605                if (!laststart)
2606                  {
2607                    if (syntax & RE_CONTEXT_INVALID_OPS)
2608                      FREE_STACK_RETURN (REG_BADRPT);
2609                    else if (syntax & RE_CONTEXT_INDEP_OPS)
2610                      laststart = b;
2611                    else
2612                      goto unfetch_interval;
2613                  }
2614
2615                /* If the upper bound is zero, don't want to succeed at
2616                   all; jump from `laststart' to `b + 3', which will be
2617                   the end of the buffer after we insert the jump.  */
2618                 if (upper_bound == 0)
2619                   {
2620                     GET_BUFFER_SPACE (3);
2621                     INSERT_JUMP (jump, laststart, b + 3);
2622                     b += 3;
2623                   }
2624
2625                 /* Otherwise, we have a nontrivial interval.  When
2626                    we're all done, the pattern will look like:
2627                      set_number_at <jump count> <upper bound>
2628                      set_number_at <succeed_n count> <lower bound>
2629                      succeed_n <after jump addr> <succeed_n count>
2630                      <body of loop>
2631                      jump_n <succeed_n addr> <jump count>
2632                    (The upper bound and `jump_n' are omitted if
2633                    `upper_bound' is 1, though.)  */
2634                 else
2635                   { /* If the upper bound is > 1, we need to insert
2636                        more at the end of the loop.  */
2637                     unsigned nbytes = 10 + (upper_bound > 1) * 10;
2638
2639                     GET_BUFFER_SPACE (nbytes);
2640
2641                     /* Initialize lower bound of the `succeed_n', even
2642                        though it will be set during matching by its
2643                        attendant `set_number_at' (inserted next),
2644                        because `re_compile_fastmap' needs to know.
2645                        Jump to the `jump_n' we might insert below.  */
2646                     INSERT_JUMP2 (succeed_n, laststart,
2647                                   b + 5 + (upper_bound > 1) * 5,
2648                                   lower_bound);
2649                     b += 5;
2650
2651                     /* Code to initialize the lower bound.  Insert
2652                        before the `succeed_n'.  The `5' is the last two
2653                        bytes of this `set_number_at', plus 3 bytes of
2654                        the following `succeed_n'.  */
2655                     insert_op2 (set_number_at, laststart, 5, lower_bound, b);
2656                     b += 5;
2657
2658                     if (upper_bound > 1)
2659                       { /* More than one repetition is allowed, so
2660                            append a backward jump to the `succeed_n'
2661                            that starts this interval.
2662
2663                            When we've reached this during matching,
2664                            we'll have matched the interval once, so
2665                            jump back only `upper_bound - 1' times.  */
2666                         STORE_JUMP2 (jump_n, b, laststart + 5,
2667                                      upper_bound - 1);
2668                         b += 5;
2669
2670                         /* The location we want to set is the second
2671                            parameter of the `jump_n'; that is `b-2' as
2672                            an absolute address.  `laststart' will be
2673                            the `set_number_at' we're about to insert;
2674                            `laststart+3' the number to set, the source
2675                            for the relative address.  But we are
2676                            inserting into the middle of the pattern --
2677                            so everything is getting moved up by 5.
2678                            Conclusion: (b - 2) - (laststart + 3) + 5,
2679                            i.e., b - laststart.
2680
2681                            We insert this at the beginning of the loop
2682                            so that if we fail during matching, we'll
2683                            reinitialize the bounds.  */
2684                         insert_op2 (set_number_at, laststart, b - laststart,
2685                                     upper_bound - 1, b);
2686                         b += 5;
2687                       }
2688                   }
2689                pending_exact = 0;
2690                beg_interval = NULL;
2691              }
2692              break;
2693
2694            unfetch_interval:
2695              /* If an invalid interval, match the characters as literals.  */
2696               assert (beg_interval);
2697               p = beg_interval;
2698               beg_interval = NULL;
2699
2700               /* normal_char and normal_backslash need `c'.  */
2701               PATFETCH (c);
2702
2703               if (!(syntax & RE_NO_BK_BRACES))
2704                 {
2705                   if (p > pattern  &&  p[-1] == '\\')
2706                     goto normal_backslash;
2707                 }
2708               goto normal_char;
2709
2710#ifdef emacs
2711            /* There is no way to specify the before_dot and after_dot
2712               operators.  rms says this is ok.  --karl  */
2713            case '=':
2714              BUF_PUSH (at_dot);
2715              break;
2716
2717            case 's':
2718              laststart = b;
2719              PATFETCH (c);
2720              BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
2721              break;
2722
2723            case 'S':
2724              laststart = b;
2725              PATFETCH (c);
2726              BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
2727              break;
2728#endif /* emacs */
2729
2730
2731            case 'w':
2732              if (syntax & RE_NO_GNU_OPS)
2733                goto normal_char;
2734              laststart = b;
2735              BUF_PUSH (wordchar);
2736              break;
2737
2738
2739            case 'W':
2740              if (syntax & RE_NO_GNU_OPS)
2741                goto normal_char;
2742              laststart = b;
2743              BUF_PUSH (notwordchar);
2744              break;
2745
2746
2747            case '<':
2748              if (syntax & RE_NO_GNU_OPS)
2749                goto normal_char;
2750              BUF_PUSH (wordbeg);
2751              break;
2752
2753            case '>':
2754              if (syntax & RE_NO_GNU_OPS)
2755                goto normal_char;
2756              BUF_PUSH (wordend);
2757              break;
2758
2759            case 'b':
2760              if (syntax & RE_NO_GNU_OPS)
2761                goto normal_char;
2762              BUF_PUSH (wordbound);
2763              break;
2764
2765            case 'B':
2766              if (syntax & RE_NO_GNU_OPS)
2767                goto normal_char;
2768              BUF_PUSH (notwordbound);
2769              break;
2770
2771            case '`':
2772              if (syntax & RE_NO_GNU_OPS)
2773                goto normal_char;
2774              BUF_PUSH (begbuf);
2775              break;
2776
2777            case '\'':
2778              if (syntax & RE_NO_GNU_OPS)
2779                goto normal_char;
2780              BUF_PUSH (endbuf);
2781              break;
2782
2783            case '1': case '2': case '3': case '4': case '5':
2784            case '6': case '7': case '8': case '9':
2785              if (syntax & RE_NO_BK_REFS)
2786                goto normal_char;
2787
2788              c1 = c - '0';
2789
2790              if (c1 > regnum)
2791                FREE_STACK_RETURN (REG_ESUBREG);
2792
2793              /* Can't back reference to a subexpression if inside of it.  */
2794              if (group_in_compile_stack (compile_stack, (regnum_t) c1))
2795                goto normal_char;
2796
2797              laststart = b;
2798              BUF_PUSH_2 (duplicate, c1);
2799              break;
2800
2801
2802            case '+':
2803            case '?':
2804              if (syntax & RE_BK_PLUS_QM)
2805                goto handle_plus;
2806              else
2807                goto normal_backslash;
2808
2809            default:
2810            normal_backslash:
2811              /* You might think it would be useful for \ to mean
2812                 not to translate; but if we don't translate it
2813                 it will never match anything.  */
2814              c = TRANSLATE (c);
2815              goto normal_char;
2816            }
2817          break;
2818
2819
2820        default:
2821        /* Expects the character in `c'.  */
2822        normal_char:
2823              /* If no exactn currently being built.  */
2824          if (!pending_exact
2825
2826              /* If last exactn not at current position.  */
2827              || pending_exact + *pending_exact + 1 != b
2828
2829              /* We have only one byte following the exactn for the count.  */
2830              || *pending_exact == (1 << BYTEWIDTH) - 1
2831
2832              /* If followed by a repetition operator.  */
2833              || *p == '*' || *p == '^'
2834              || ((syntax & RE_BK_PLUS_QM)
2835                  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2836                  : (*p == '+' || *p == '?'))
2837              || ((syntax & RE_INTERVALS)
2838                  && ((syntax & RE_NO_BK_BRACES)
2839                      ? *p == '{'
2840                      : (p[0] == '\\' && p[1] == '{'))))
2841            {
2842              /* Start building a new exactn.  */
2843
2844              laststart = b;
2845
2846              BUF_PUSH_2 (exactn, 0);
2847              pending_exact = b - 1;
2848            }
2849
2850          BUF_PUSH (c);
2851          (*pending_exact)++;
2852          break;
2853        } /* switch (c) */
2854    } /* while p != pend */
2855
2856
2857  /* Through the pattern now.  */
2858
2859  if (fixup_alt_jump)
2860    STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2861
2862  if (!COMPILE_STACK_EMPTY)
2863    FREE_STACK_RETURN (REG_EPAREN);
2864
2865  /* If we don't want backtracking, force success
2866     the first time we reach the end of the compiled pattern.  */
2867  if (syntax & RE_NO_POSIX_BACKTRACKING)
2868    BUF_PUSH (succeed);
2869
2870  free (compile_stack.stack);
2871
2872  /* We have succeeded; set the length of the buffer.  */
2873  bufp->used = b - bufp->buffer;
2874
2875#ifdef DEBUG
2876  if (debug)
2877    {
2878      DEBUG_PRINT1 ("\nCompiled pattern: \n");
2879      print_compiled_pattern (bufp);
2880    }
2881#endif /* DEBUG */
2882
2883#ifndef MATCH_MAY_ALLOCATE
2884  /* Initialize the failure stack to the largest possible stack.  This
2885     isn't necessary unless we're trying to avoid calling alloca in
2886     the search and match routines.  */
2887  {
2888    int num_regs = bufp->re_nsub + 1;
2889
2890    /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
2891       is strictly greater than re_max_failures, the largest possible stack
2892       is 2 * re_max_failures failure points.  */
2893    if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
2894      {
2895        fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
2896
2897# ifdef emacs
2898        if (! fail_stack.stack)
2899          fail_stack.stack
2900            = (fail_stack_elt_t *) xmalloc (fail_stack.size
2901                                            * sizeof (fail_stack_elt_t));
2902        else
2903          fail_stack.stack
2904            = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
2905                                             (fail_stack.size
2906                                              * sizeof (fail_stack_elt_t)));
2907# else /* not emacs */
2908        if (! fail_stack.stack)
2909          fail_stack.stack
2910            = (fail_stack_elt_t *) malloc (fail_stack.size
2911                                           * sizeof (fail_stack_elt_t));
2912        else
2913          fail_stack.stack
2914            = (fail_stack_elt_t *) realloc (fail_stack.stack,
2915                                            (fail_stack.size
2916                                             * sizeof (fail_stack_elt_t)));
2917# endif /* not emacs */
2918      }
2919
2920    regex_grow_registers (num_regs);
2921  }
2922#endif /* not MATCH_MAY_ALLOCATE */
2923
2924  return REG_NOERROR;
2925} /* regex_compile */
2926
2927/* Subroutines for `regex_compile'.  */
2928
2929/* Store OP at LOC followed by two-byte integer parameter ARG.  */
2930
2931static void
2932store_op1 (op, loc, arg)
2933    re_opcode_t op;
2934    unsigned char *loc;
2935    int arg;
2936{
2937  *loc = (unsigned char) op;
2938  STORE_NUMBER (loc + 1, arg);
2939}
2940
2941
2942/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
2943
2944static void
2945store_op2 (op, loc, arg1, arg2)
2946    re_opcode_t op;
2947    unsigned char *loc;
2948    int arg1, arg2;
2949{
2950  *loc = (unsigned char) op;
2951  STORE_NUMBER (loc + 1, arg1);
2952  STORE_NUMBER (loc + 3, arg2);
2953}
2954
2955
2956/* Copy the bytes from LOC to END to open up three bytes of space at LOC
2957   for OP followed by two-byte integer parameter ARG.  */
2958
2959static void
2960insert_op1 (op, loc, arg, end)
2961    re_opcode_t op;
2962    unsigned char *loc;
2963    int arg;
2964    unsigned char *end;
2965{
2966  register unsigned char *pfrom = end;
2967  register unsigned char *pto = end + 3;
2968
2969  while (pfrom != loc)
2970    *--pto = *--pfrom;
2971
2972  store_op1 (op, loc, arg);
2973}
2974
2975
2976/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
2977
2978static void
2979insert_op2 (op, loc, arg1, arg2, end)
2980    re_opcode_t op;
2981    unsigned char *loc;
2982    int arg1, arg2;
2983    unsigned char *end;
2984{
2985  register unsigned char *pfrom = end;
2986  register unsigned char *pto = end + 5;
2987
2988  while (pfrom != loc)
2989    *--pto = *--pfrom;
2990
2991  store_op2 (op, loc, arg1, arg2);
2992}
2993
2994
2995/* P points to just after a ^ in PATTERN.  Return true if that ^ comes
2996   after an alternative or a begin-subexpression.  We assume there is at
2997   least one character before the ^.  */
2998
2999static boolean
3000at_begline_loc_p (pattern, p, syntax)
3001    const char *pattern, *p;
3002    reg_syntax_t syntax;
3003{
3004  const char *prev = p - 2;
3005  boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
3006
3007  return
3008       /* After a subexpression?  */
3009       (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
3010       /* After an alternative?  */
3011    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
3012}
3013
3014
3015/* The dual of at_begline_loc_p.  This one is for $.  We assume there is
3016   at least one character after the $, i.e., `P < PEND'.  */
3017
3018static boolean
3019at_endline_loc_p (p, pend, syntax)
3020    const char *p, *pend;
3021    reg_syntax_t syntax;
3022{
3023  const char *next = p;
3024  boolean next_backslash = *next == '\\';
3025  const char *next_next = p + 1 < pend ? p + 1 : 0;
3026
3027  return
3028       /* Before a subexpression?  */
3029       (syntax & RE_NO_BK_PARENS ? *next == ')'
3030        : next_backslash && next_next && *next_next == ')')
3031       /* Before an alternative?  */
3032    || (syntax & RE_NO_BK_VBAR ? *next == '|'
3033        : next_backslash && next_next && *next_next == '|');
3034}
3035
3036
3037/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
3038   false if it's not.  */
3039
3040static boolean
3041group_in_compile_stack (compile_stack, regnum)
3042    compile_stack_type compile_stack;
3043    regnum_t regnum;
3044{
3045  int this_element;
3046
3047  for (this_element = compile_stack.avail - 1;
3048       this_element >= 0;
3049       this_element--)
3050    if (compile_stack.stack[this_element].regnum == regnum)
3051      return true;
3052
3053  return false;
3054}
3055
3056
3057/* Read the ending character of a range (in a bracket expression) from the
3058   uncompiled pattern *P_PTR (which ends at PEND).  We assume the
3059   starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
3060   Then we set the translation of all bits between the starting and
3061   ending characters (inclusive) in the compiled pattern B.
3062
3063   Return an error code.
3064
3065   We use these short variable names so we can use the same macros as
3066   `regex_compile' itself.  */
3067
3068static reg_errcode_t
3069compile_range (p_ptr, pend, translate, syntax, b)
3070    const char **p_ptr, *pend;
3071    RE_TRANSLATE_TYPE translate;
3072    reg_syntax_t syntax;
3073    unsigned char *b;
3074{
3075  unsigned this_char;
3076
3077  const char *p = *p_ptr;
3078  unsigned int range_start, range_end;
3079
3080  if (p == pend)
3081    return REG_ERANGE;
3082
3083  /* Even though the pattern is a signed `char *', we need to fetch
3084     with unsigned char *'s; if the high bit of the pattern character
3085     is set, the range endpoints will be negative if we fetch using a
3086     signed char *.
3087
3088     We also want to fetch the endpoints without translating them; the
3089     appropriate translation is done in the bit-setting loop below.  */
3090  /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
3091  range_start = ((const unsigned char *) p)[-2];
3092  range_end   = ((const unsigned char *) p)[0];
3093
3094  /* Have to increment the pointer into the pattern string, so the
3095     caller isn't still at the ending character.  */
3096  (*p_ptr)++;
3097
3098  /* If the start is after the end, the range is empty.  */
3099  if (range_start > range_end)
3100    return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
3101
3102  /* Here we see why `this_char' has to be larger than an `unsigned
3103     char' -- the range is inclusive, so if `range_end' == 0xff
3104     (assuming 8-bit characters), we would otherwise go into an infinite
3105     loop, since all characters <= 0xff.  */
3106  for (this_char = range_start; this_char <= range_end; this_char++)
3107    {
3108      SET_LIST_BIT (TRANSLATE (this_char));
3109    }
3110
3111  return REG_NOERROR;
3112}
3113
3114/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
3115   BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
3116   characters can start a string that matches the pattern.  This fastmap
3117   is used by re_search to skip quickly over impossible starting points.
3118
3119   The caller must supply the address of a (1 << BYTEWIDTH)-byte data
3120   area as BUFP->fastmap.
3121
3122   We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
3123   the pattern buffer.
3124
3125   Returns 0 if we succeed, -2 if an internal error.   */
3126
3127int
3128re_compile_fastmap (bufp)
3129     struct re_pattern_buffer *bufp;
3130{
3131  int j, k;
3132#ifdef MATCH_MAY_ALLOCATE
3133  fail_stack_type fail_stack;
3134#endif
3135#ifndef REGEX_MALLOC
3136  char *destination;
3137#endif
3138
3139  register char *fastmap = bufp->fastmap;
3140  unsigned char *pattern = bufp->buffer;
3141  unsigned char *p = pattern;
3142  register unsigned char *pend = pattern + bufp->used;
3143
3144#ifdef REL_ALLOC
3145  /* This holds the pointer to the failure stack, when
3146     it is allocated relocatably.  */
3147  fail_stack_elt_t *failure_stack_ptr;
3148#endif
3149
3150  /* Assume that each path through the pattern can be null until
3151     proven otherwise.  We set this false at the bottom of switch
3152     statement, to which we get only if a particular path doesn't
3153     match the empty string.  */
3154  boolean path_can_be_null = true;
3155
3156  /* We aren't doing a `succeed_n' to begin with.  */
3157  boolean succeed_n_p = false;
3158
3159  assert (fastmap != NULL && p != NULL);
3160
3161  INIT_FAIL_STACK ();
3162  bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
3163  bufp->fastmap_accurate = 1;       /* It will be when we're done.  */
3164  bufp->can_be_null = 0;
3165
3166  while (1)
3167    {
3168      if (p == pend || *p == succeed)
3169        {
3170          /* We have reached the (effective) end of pattern.  */
3171          if (!FAIL_STACK_EMPTY ())
3172            {
3173              bufp->can_be_null |= path_can_be_null;
3174
3175              /* Reset for next path.  */
3176              path_can_be_null = true;
3177
3178              p = fail_stack.stack[--fail_stack.avail].pointer;
3179
3180              continue;
3181            }
3182          else
3183            break;
3184        }
3185
3186      /* We should never be about to go beyond the end of the pattern.  */
3187      assert (p < pend);
3188
3189      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
3190        {
3191
3192        /* I guess the idea here is to simply not bother with a fastmap
3193           if a backreference is used, since it's too hard to figure out
3194           the fastmap for the corresponding group.  Setting
3195           `can_be_null' stops `re_search_2' from using the fastmap, so
3196           that is all we do.  */
3197        case duplicate:
3198          bufp->can_be_null = 1;
3199          goto done;
3200
3201
3202      /* Following are the cases which match a character.  These end
3203         with `break'.  */
3204
3205        case exactn:
3206          fastmap[p[1]] = 1;
3207          break;
3208
3209
3210        case charset:
3211          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3212            if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
3213              fastmap[j] = 1;
3214          break;
3215
3216
3217        case charset_not:
3218          /* Chars beyond end of map must be allowed.  */
3219          for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
3220            fastmap[j] = 1;
3221
3222          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3223            if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3224              fastmap[j] = 1;
3225          break;
3226
3227
3228        case wordchar:
3229          for (j = 0; j < (1 << BYTEWIDTH); j++)
3230            if (SYNTAX (j) == Sword)
3231              fastmap[j] = 1;
3232          break;
3233
3234
3235        case notwordchar:
3236          for (j = 0; j < (1 << BYTEWIDTH); j++)
3237            if (SYNTAX (j) != Sword)
3238              fastmap[j] = 1;
3239          break;
3240
3241
3242        case anychar:
3243          {
3244            int fastmap_newline = fastmap['\n'];
3245
3246            /* `.' matches anything ...  */
3247            for (j = 0; j < (1 << BYTEWIDTH); j++)
3248              fastmap[j] = 1;
3249
3250            /* ... except perhaps newline.  */
3251            if (!(bufp->syntax & RE_DOT_NEWLINE))
3252              fastmap['\n'] = fastmap_newline;
3253
3254            /* Return if we have already set `can_be_null'; if we have,
3255               then the fastmap is irrelevant.  Something's wrong here.  */
3256            else if (bufp->can_be_null)
3257              goto done;
3258
3259            /* Otherwise, have to check alternative paths.  */
3260            break;
3261          }
3262
3263#ifdef emacs
3264        case syntaxspec:
3265          k = *p++;
3266          for (j = 0; j < (1 << BYTEWIDTH); j++)
3267            if (SYNTAX (j) == (enum syntaxcode) k)
3268              fastmap[j] = 1;
3269          break;
3270
3271
3272        case notsyntaxspec:
3273          k = *p++;
3274          for (j = 0; j < (1 << BYTEWIDTH); j++)
3275            if (SYNTAX (j) != (enum syntaxcode) k)
3276              fastmap[j] = 1;
3277          break;
3278
3279
3280      /* All cases after this match the empty string.  These end with
3281         `continue'.  */
3282
3283
3284        case before_dot:
3285        case at_dot:
3286        case after_dot:
3287          continue;
3288#endif /* emacs */
3289
3290
3291        case no_op:
3292        case begline:
3293        case endline:
3294        case begbuf:
3295        case endbuf:
3296        case wordbound:
3297        case notwordbound:
3298        case wordbeg:
3299        case wordend:
3300        case push_dummy_failure:
3301          continue;
3302
3303
3304        case jump_n:
3305        case pop_failure_jump:
3306        case maybe_pop_jump:
3307        case jump:
3308        case jump_past_alt:
3309        case dummy_failure_jump:
3310          EXTRACT_NUMBER_AND_INCR (j, p);
3311          p += j;
3312          if (j > 0)
3313            continue;
3314
3315          /* Jump backward implies we just went through the body of a
3316             loop and matched nothing.  Opcode jumped to should be
3317             `on_failure_jump' or `succeed_n'.  Just treat it like an
3318             ordinary jump.  For a * loop, it has pushed its failure
3319             point already; if so, discard that as redundant.  */
3320          if ((re_opcode_t) *p != on_failure_jump
3321              && (re_opcode_t) *p != succeed_n)
3322            continue;
3323
3324          p++;
3325          EXTRACT_NUMBER_AND_INCR (j, p);
3326          p += j;
3327
3328          /* If what's on the stack is where we are now, pop it.  */
3329          if (!FAIL_STACK_EMPTY ()
3330              && fail_stack.stack[fail_stack.avail - 1].pointer == p)
3331            fail_stack.avail--;
3332
3333          continue;
3334
3335
3336        case on_failure_jump:
3337        case on_failure_keep_string_jump:
3338        handle_on_failure_jump:
3339          EXTRACT_NUMBER_AND_INCR (j, p);
3340
3341          /* For some patterns, e.g., `(a?)?', `p+j' here points to the
3342             end of the pattern.  We don't want to push such a point,
3343             since when we restore it above, entering the switch will
3344             increment `p' past the end of the pattern.  We don't need
3345             to push such a point since we obviously won't find any more
3346             fastmap entries beyond `pend'.  Such a pattern can match
3347             the null string, though.  */
3348          if (p + j < pend)
3349            {
3350              if (!PUSH_PATTERN_OP (p + j, fail_stack))
3351                {
3352                  RESET_FAIL_STACK ();
3353                  return -2;
3354                }
3355            }
3356          else
3357            bufp->can_be_null = 1;
3358
3359          if (succeed_n_p)
3360            {
3361              EXTRACT_NUMBER_AND_INCR (k, p);   /* Skip the n.  */
3362              succeed_n_p = false;
3363            }
3364
3365          continue;
3366
3367
3368        case succeed_n:
3369          /* Get to the number of times to succeed.  */
3370          p += 2;
3371
3372          /* Increment p past the n for when k != 0.  */
3373          EXTRACT_NUMBER_AND_INCR (k, p);
3374          if (k == 0)
3375            {
3376              p -= 4;
3377              succeed_n_p = true;  /* Spaghetti code alert.  */
3378              goto handle_on_failure_jump;
3379            }
3380          continue;
3381
3382
3383        case set_number_at:
3384          p += 4;
3385          continue;
3386
3387
3388        case start_memory:
3389        case stop_memory:
3390          p += 2;
3391          continue;
3392
3393
3394        default:
3395          abort (); /* We have listed all the cases.  */
3396        } /* switch *p++ */
3397
3398      /* Getting here means we have found the possible starting
3399         characters for one path of the pattern -- and that the empty
3400         string does not match.  We need not follow this path further.
3401         Instead, look at the next alternative (remembered on the
3402         stack), or quit if no more.  The test at the top of the loop
3403         does these things.  */
3404      path_can_be_null = false;
3405      p = pend;
3406    } /* while p */
3407
3408  /* Set `can_be_null' for the last path (also the first path, if the
3409     pattern is empty).  */
3410  bufp->can_be_null |= path_can_be_null;
3411
3412 done:
3413  RESET_FAIL_STACK ();
3414  return 0;
3415} /* re_compile_fastmap */
3416#ifdef _LIBC
3417weak_alias (__re_compile_fastmap, re_compile_fastmap)
3418#endif
3419
3420/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
3421   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
3422   this memory for recording register information.  STARTS and ENDS
3423   must be allocated using the malloc library routine, and must each
3424   be at least NUM_REGS * sizeof (regoff_t) bytes long.
3425
3426   If NUM_REGS == 0, then subsequent matches should allocate their own
3427   register data.
3428
3429   Unless this function is called, the first search or match using
3430   PATTERN_BUFFER will allocate its own register data, without
3431   freeing the old data.  */
3432
3433void
3434re_set_registers (bufp, regs, num_regs, starts, ends)
3435    struct re_pattern_buffer *bufp;
3436    struct re_registers *regs;
3437    unsigned num_regs;
3438    regoff_t *starts, *ends;
3439{
3440  if (num_regs)
3441    {
3442      bufp->regs_allocated = REGS_REALLOCATE;
3443      regs->num_regs = num_regs;
3444      regs->start = starts;
3445      regs->end = ends;
3446    }
3447  else
3448    {
3449      bufp->regs_allocated = REGS_UNALLOCATED;
3450      regs->num_regs = 0;
3451      regs->start = regs->end = (regoff_t *) 0;
3452    }
3453}
3454#ifdef _LIBC
3455weak_alias (__re_set_registers, re_set_registers)
3456#endif
3457
3458/* Searching routines.  */
3459
3460/* Like re_search_2, below, but only one string is specified, and
3461   doesn't let you say where to stop matching. */
3462
3463int
3464re_search (bufp, string, size, startpos, range, regs)
3465     struct re_pattern_buffer *bufp;
3466     const char *string;
3467     int size, startpos, range;
3468     struct re_registers *regs;
3469{
3470  return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
3471                      regs, size);
3472}
3473#ifdef _LIBC
3474weak_alias (__re_search, re_search)
3475#endif
3476
3477
3478/* Using the compiled pattern in BUFP->buffer, first tries to match the
3479   virtual concatenation of STRING1 and STRING2, starting first at index
3480   STARTPOS, then at STARTPOS + 1, and so on.
3481
3482   STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
3483
3484   RANGE is how far to scan while trying to match.  RANGE = 0 means try
3485   only at STARTPOS; in general, the last start tried is STARTPOS +
3486   RANGE.
3487
3488   In REGS, return the indices of the virtual concatenation of STRING1
3489   and STRING2 that matched the entire BUFP->buffer and its contained
3490   subexpressions.
3491
3492   Do not consider matching one past the index STOP in the virtual
3493   concatenation of STRING1 and STRING2.
3494
3495   We return either the position in the strings at which the match was
3496   found, -1 if no match, or -2 if error (such as failure
3497   stack overflow).  */
3498
3499int
3500re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
3501     struct re_pattern_buffer *bufp;
3502     const char *string1, *string2;
3503     int size1, size2;
3504     int startpos;
3505     int range;
3506     struct re_registers *regs;
3507     int stop;
3508{
3509  int val;
3510  register char *fastmap = bufp->fastmap;
3511  register RE_TRANSLATE_TYPE translate = bufp->translate;
3512  int total_size = size1 + size2;
3513  int endpos = startpos + range;
3514
3515  /* Check for out-of-range STARTPOS.  */
3516  if (startpos < 0 || startpos > total_size)
3517    return -1;
3518
3519  /* Fix up RANGE if it might eventually take us outside
3520     the virtual concatenation of STRING1 and STRING2.
3521     Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
3522  if (endpos < 0)
3523    range = 0 - startpos;
3524  else if (endpos > total_size)
3525    range = total_size - startpos;
3526
3527  /* If the search isn't to be a backwards one, don't waste time in a
3528     search for a pattern that must be anchored.  */
3529  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
3530    {
3531      if (startpos > 0)
3532        return -1;
3533      else
3534        range = 1;
3535    }
3536
3537#ifdef emacs
3538  /* In a forward search for something that starts with \=.
3539     don't keep searching past point.  */
3540  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
3541    {
3542      range = PT - startpos;
3543      if (range <= 0)
3544        return -1;
3545    }
3546#endif /* emacs */
3547
3548  /* Update the fastmap now if not correct already.  */
3549  if (fastmap && !bufp->fastmap_accurate)
3550    if (re_compile_fastmap (bufp) == -2)
3551      return -2;
3552
3553  /* Loop through the string, looking for a place to start matching.  */
3554  for (;;)
3555    {
3556      /* If a fastmap is supplied, skip quickly over characters that
3557         cannot be the start of a match.  If the pattern can match the
3558         null string, however, we don't need to skip characters; we want
3559         the first null string.  */
3560      if (fastmap && startpos < total_size && !bufp->can_be_null)
3561        {
3562          if (range > 0)        /* Searching forwards.  */
3563            {
3564              register const char *d;
3565              register int lim = 0;
3566              int irange = range;
3567
3568              if (startpos < size1 && startpos + range >= size1)
3569                lim = range - (size1 - startpos);
3570
3571              d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
3572
3573              /* Written out as an if-else to avoid testing `translate'
3574                 inside the loop.  */
3575              if (translate)
3576                while (range > lim
3577                       && !fastmap[(unsigned char)
3578                                   translate[(unsigned char) *d++]])
3579                  range--;
3580              else
3581                while (range > lim && !fastmap[(unsigned char) *d++])
3582                  range--;
3583
3584              startpos += irange - range;
3585            }
3586          else                          /* Searching backwards.  */
3587            {
3588              register char c = (size1 == 0 || startpos >= size1
3589                                 ? string2[startpos - size1]
3590                                 : string1[startpos]);
3591
3592              if (!fastmap[(unsigned char) TRANSLATE (c)])
3593                goto advance;
3594            }
3595        }
3596
3597      /* If can't match the null string, and that's all we have left, fail.  */
3598      if (range >= 0 && startpos == total_size && fastmap
3599          && !bufp->can_be_null)
3600        return -1;
3601
3602      val = re_match_2_internal (bufp, string1, size1, string2, size2,
3603                                 startpos, regs, stop);
3604#ifndef REGEX_MALLOC
3605# ifdef C_ALLOCA
3606      alloca (0);
3607# endif
3608#endif
3609
3610      if (val >= 0)
3611        return startpos;
3612
3613      if (val == -2)
3614        return -2;
3615
3616    advance:
3617      if (!range)
3618        break;
3619      else if (range > 0)
3620        {
3621          range--;
3622          startpos++;
3623        }
3624      else
3625        {
3626          range++;
3627          startpos--;
3628        }
3629    }
3630  return -1;
3631} /* re_search_2 */
3632#ifdef _LIBC
3633weak_alias (__re_search_2, re_search_2)
3634#endif
3635
3636/* This converts PTR, a pointer into one of the search strings `string1'
3637   and `string2' into an offset from the beginning of that string.  */
3638#define POINTER_TO_OFFSET(ptr)                  \
3639  (FIRST_STRING_P (ptr)                         \
3640   ? ((regoff_t) ((ptr) - string1))             \
3641   : ((regoff_t) ((ptr) - string2 + size1)))
3642
3643/* Macros for dealing with the split strings in re_match_2.  */
3644
3645#define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
3646
3647/* Call before fetching a character with *d.  This switches over to
3648   string2 if necessary.  */
3649#define PREFETCH()                                                      \
3650  while (d == dend)                                                     \
3651    {                                                                   \
3652      /* End of string2 => fail.  */                                    \
3653      if (dend == end_match_2)                                          \
3654        goto fail;                                                      \
3655      /* End of string1 => advance to string2.  */                      \
3656      d = string2;                                                      \
3657      dend = end_match_2;                                               \
3658    }
3659
3660
3661/* Test if at very beginning or at very end of the virtual concatenation
3662   of `string1' and `string2'.  If only one string, it's `string2'.  */
3663#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3664#define AT_STRINGS_END(d) ((d) == end2)
3665
3666
3667/* Test if D points to a character which is word-constituent.  We have
3668   two special cases to check for: if past the end of string1, look at
3669   the first character in string2; and if before the beginning of
3670   string2, look at the last character in string1.  */
3671#define WORDCHAR_P(d)                                                   \
3672  (SYNTAX ((d) == end1 ? *string2                                       \
3673           : (d) == string2 - 1 ? *(end1 - 1) : *(d))                   \
3674   == Sword)
3675
3676/* Disabled due to a compiler bug -- see comment at case wordbound */
3677#if 0
3678/* Test if the character before D and the one at D differ with respect
3679   to being word-constituent.  */
3680#define AT_WORD_BOUNDARY(d)                                             \
3681  (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)                             \
3682   || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3683#endif
3684
3685/* Free everything we malloc.  */
3686#ifdef MATCH_MAY_ALLOCATE
3687# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
3688# define FREE_VARIABLES()                                               \
3689  do {                                                                  \
3690    REGEX_FREE_STACK (fail_stack.stack);                                \
3691    FREE_VAR (regstart);                                                \
3692    FREE_VAR (regend);                                                  \
3693    FREE_VAR (old_regstart);                                            \
3694    FREE_VAR (old_regend);                                              \
3695    FREE_VAR (best_regstart);                                           \
3696    FREE_VAR (best_regend);                                             \
3697    FREE_VAR (reg_info);                                                \
3698    FREE_VAR (reg_dummy);                                               \
3699    FREE_VAR (reg_info_dummy);                                          \
3700  } while (0)
3701#else
3702# define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning. */
3703#endif /* not MATCH_MAY_ALLOCATE */
3704
3705/* These values must meet several constraints.  They must not be valid
3706   register values; since we have a limit of 255 registers (because
3707   we use only one byte in the pattern for the register number), we can
3708   use numbers larger than 255.  They must differ by 1, because of
3709   NUM_FAILURE_ITEMS above.  And the value for the lowest register must
3710   be larger than the value for the highest register, so we do not try
3711   to actually save any registers when none are active.  */
3712#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3713#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3714
3715/* Matching routines.  */
3716
3717#ifndef emacs   /* Emacs never uses this.  */
3718/* re_match is like re_match_2 except it takes only a single string.  */
3719
3720int
3721re_match (bufp, string, size, pos, regs)
3722     struct re_pattern_buffer *bufp;
3723     const char *string;
3724     int size, pos;
3725     struct re_registers *regs;
3726{
3727  int result = re_match_2_internal (bufp, NULL, 0, string, size,
3728                                    pos, regs, size);
3729# ifndef REGEX_MALLOC
3730#  ifdef C_ALLOCA
3731  alloca (0);
3732#  endif
3733# endif
3734  return result;
3735}
3736# ifdef _LIBC
3737weak_alias (__re_match, re_match)
3738# endif
3739#endif /* not emacs */
3740
3741static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
3742                                                    unsigned char *end,
3743                                                register_info_type *reg_info));
3744static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p,
3745                                                  unsigned char *end,
3746                                                register_info_type *reg_info));
3747static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p,
3748                                                        unsigned char *end,
3749                                                register_info_type *reg_info));
3750static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2,
3751                                     int len, char *translate));
3752
3753/* re_match_2 matches the compiled pattern in BUFP against the
3754   the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3755   and SIZE2, respectively).  We start matching at POS, and stop
3756   matching at STOP.
3757
3758   If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3759   store offsets for the substring each group matched in REGS.  See the
3760   documentation for exactly how many groups we fill.
3761
3762   We return -1 if no match, -2 if an internal error (such as the
3763   failure stack overflowing).  Otherwise, we return the length of the
3764   matched substring.  */
3765
3766int
3767re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3768     struct re_pattern_buffer *bufp;
3769     const char *string1, *string2;
3770     int size1, size2;
3771     int pos;
3772     struct re_registers *regs;
3773     int stop;
3774{
3775  int result = re_match_2_internal (bufp, string1, size1, string2, size2,
3776                                    pos, regs, stop);
3777#ifndef REGEX_MALLOC
3778# ifdef C_ALLOCA
3779  alloca (0);
3780# endif
3781#endif
3782  return result;
3783}
3784#ifdef _LIBC
3785weak_alias (__re_match_2, re_match_2)
3786#endif
3787
3788/* This is a separate function so that we can force an alloca cleanup
3789   afterwards.  */
3790static int
3791re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
3792     struct re_pattern_buffer *bufp;
3793     const char *string1, *string2;
3794     int size1, size2;
3795     int pos;
3796     struct re_registers *regs;
3797     int stop;
3798{
3799  /* General temporaries.  */
3800  int mcnt;
3801  unsigned char *p1;
3802
3803  /* Just past the end of the corresponding string.  */
3804  const char *end1, *end2;
3805
3806  /* Pointers into string1 and string2, just past the last characters in
3807     each to consider matching.  */
3808  const char *end_match_1, *end_match_2;
3809
3810  /* Where we are in the data, and the end of the current string.  */
3811  const char *d, *dend;
3812
3813  /* Where we are in the pattern, and the end of the pattern.  */
3814  unsigned char *p = bufp->buffer;
3815  register unsigned char *pend = p + bufp->used;
3816
3817  /* Mark the opcode just after a start_memory, so we can test for an
3818     empty subpattern when we get to the stop_memory.  */
3819  unsigned char *just_past_start_mem = 0;
3820
3821  /* We use this to map every character in the string.  */
3822  RE_TRANSLATE_TYPE translate = bufp->translate;
3823
3824  /* Failure point stack.  Each place that can handle a failure further
3825     down the line pushes a failure point on this stack.  It consists of
3826     restart, regend, and reg_info for all registers corresponding to
3827     the subexpressions we're currently inside, plus the number of such
3828     registers, and, finally, two char *'s.  The first char * is where
3829     to resume scanning the pattern; the second one is where to resume
3830     scanning the strings.  If the latter is zero, the failure point is
3831     a ``dummy''; if a failure happens and the failure point is a dummy,
3832     it gets discarded and the next next one is tried.  */
3833#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
3834  fail_stack_type fail_stack;
3835#endif
3836#ifdef DEBUG
3837  static unsigned failure_id = 0;
3838  unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3839#endif
3840
3841#ifdef REL_ALLOC
3842  /* This holds the pointer to the failure stack, when
3843     it is allocated relocatably.  */
3844  fail_stack_elt_t *failure_stack_ptr;
3845#endif
3846
3847  /* We fill all the registers internally, independent of what we
3848     return, for use in backreferences.  The number here includes
3849     an element for register zero.  */
3850  size_t num_regs = bufp->re_nsub + 1;
3851
3852  /* The currently active registers.  */
3853  active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3854  active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3855
3856  /* Information on the contents of registers. These are pointers into
3857     the input strings; they record just what was matched (on this
3858     attempt) by a subexpression part of the pattern, that is, the
3859     regnum-th regstart pointer points to where in the pattern we began
3860     matching and the regnum-th regend points to right after where we
3861     stopped matching the regnum-th subexpression.  (The zeroth register
3862     keeps track of what the whole pattern matches.)  */
3863#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3864  const char **regstart, **regend;
3865#endif
3866
3867  /* If a group that's operated upon by a repetition operator fails to
3868     match anything, then the register for its start will need to be
3869     restored because it will have been set to wherever in the string we
3870     are when we last see its open-group operator.  Similarly for a
3871     register's end.  */
3872#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3873  const char **old_regstart, **old_regend;
3874#endif
3875
3876  /* The is_active field of reg_info helps us keep track of which (possibly
3877     nested) subexpressions we are currently in. The matched_something
3878     field of reg_info[reg_num] helps us tell whether or not we have
3879     matched any of the pattern so far this time through the reg_num-th
3880     subexpression.  These two fields get reset each time through any
3881     loop their register is in.  */
3882#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
3883  register_info_type *reg_info;
3884#endif
3885
3886  /* The following record the register info as found in the above
3887     variables when we find a match better than any we've seen before.
3888     This happens as we backtrack through the failure points, which in
3889     turn happens only if we have not yet matched the entire string. */
3890  unsigned best_regs_set = false;
3891#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3892  const char **best_regstart, **best_regend;
3893#endif
3894
3895  /* Logically, this is `best_regend[0]'.  But we don't want to have to
3896     allocate space for that if we're not allocating space for anything
3897     else (see below).  Also, we never need info about register 0 for
3898     any of the other register vectors, and it seems rather a kludge to
3899     treat `best_regend' differently than the rest.  So we keep track of
3900     the end of the best match so far in a separate variable.  We
3901     initialize this to NULL so that when we backtrack the first time
3902     and need to test it, it's not garbage.  */
3903  const char *match_end = NULL;
3904
3905  /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
3906  int set_regs_matched_done = 0;
3907
3908  /* Used when we pop values we don't care about.  */
3909#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3910  const char **reg_dummy;
3911  register_info_type *reg_info_dummy;
3912#endif
3913
3914#ifdef DEBUG
3915  /* Counts the total number of registers pushed.  */
3916  unsigned num_regs_pushed = 0;
3917#endif
3918
3919  DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3920
3921  INIT_FAIL_STACK ();
3922
3923#ifdef MATCH_MAY_ALLOCATE
3924  /* Do not bother to initialize all the register variables if there are
3925     no groups in the pattern, as it takes a fair amount of time.  If
3926     there are groups, we include space for register 0 (the whole
3927     pattern), even though we never use it, since it simplifies the
3928     array indexing.  We should fix this.  */
3929  if (bufp->re_nsub)
3930    {
3931      regstart = REGEX_TALLOC (num_regs, const char *);
3932      regend = REGEX_TALLOC (num_regs, const char *);
3933      old_regstart = REGEX_TALLOC (num_regs, const char *);
3934      old_regend = REGEX_TALLOC (num_regs, const char *);
3935      best_regstart = REGEX_TALLOC (num_regs, const char *);
3936      best_regend = REGEX_TALLOC (num_regs, const char *);
3937      reg_info = REGEX_TALLOC (num_regs, register_info_type);
3938      reg_dummy = REGEX_TALLOC (num_regs, const char *);
3939      reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3940
3941      if (!(regstart && regend && old_regstart && old_regend && reg_info
3942            && best_regstart && best_regend && reg_dummy && reg_info_dummy))
3943        {
3944          FREE_VARIABLES ();
3945          return -2;
3946        }
3947    }
3948  else
3949    {
3950      /* We must initialize all our variables to NULL, so that
3951         `FREE_VARIABLES' doesn't try to free them.  */
3952      regstart = regend = old_regstart = old_regend = best_regstart
3953        = best_regend = reg_dummy = NULL;
3954      reg_info = reg_info_dummy = (register_info_type *) NULL;
3955    }
3956#endif /* MATCH_MAY_ALLOCATE */
3957
3958  /* The starting position is bogus.  */
3959  if (pos < 0 || pos > size1 + size2)
3960    {
3961      FREE_VARIABLES ();
3962      return -1;
3963    }
3964
3965  /* Initialize subexpression text positions to -1 to mark ones that no
3966     start_memory/stop_memory has been seen for. Also initialize the
3967     register information struct.  */
3968  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
3969    {
3970      regstart[mcnt] = regend[mcnt]
3971        = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3972
3973      REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3974      IS_ACTIVE (reg_info[mcnt]) = 0;
3975      MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3976      EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3977    }
3978
3979  /* We move `string1' into `string2' if the latter's empty -- but not if
3980     `string1' is null.  */
3981  if (size2 == 0 && string1 != NULL)
3982    {
3983      string2 = string1;
3984      size2 = size1;
3985      string1 = 0;
3986      size1 = 0;
3987    }
3988  end1 = string1 + size1;
3989  end2 = string2 + size2;
3990
3991  /* Compute where to stop matching, within the two strings.  */
3992  if (stop <= size1)
3993    {
3994      end_match_1 = string1 + stop;
3995      end_match_2 = string2;
3996    }
3997  else
3998    {
3999      end_match_1 = end1;
4000      end_match_2 = string2 + stop - size1;
4001    }
4002
4003  /* `p' scans through the pattern as `d' scans through the data.
4004     `dend' is the end of the input string that `d' points within.  `d'
4005     is advanced into the following input string whenever necessary, but
4006     this happens before fetching; therefore, at the beginning of the
4007     loop, `d' can be pointing at the end of a string, but it cannot
4008     equal `string2'.  */
4009  if (size1 > 0 && pos <= size1)
4010    {
4011      d = string1 + pos;
4012      dend = end_match_1;
4013    }
4014  else
4015    {
4016      d = string2 + pos - size1;
4017      dend = end_match_2;
4018    }
4019
4020  DEBUG_PRINT1 ("The compiled pattern is:\n");
4021  DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
4022  DEBUG_PRINT1 ("The string to match is: `");
4023  DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
4024  DEBUG_PRINT1 ("'\n");
4025
4026  /* This loops over pattern commands.  It exits by returning from the
4027     function if the match is complete, or it drops through if the match
4028     fails at this starting point in the input data.  */
4029  for (;;)
4030    {
4031#ifdef _LIBC
4032      DEBUG_PRINT2 ("\n%p: ", p);
4033#else
4034      DEBUG_PRINT2 ("\n0x%x: ", p);
4035#endif
4036
4037      if (p == pend)
4038        { /* End of pattern means we might have succeeded.  */
4039          DEBUG_PRINT1 ("end of pattern ... ");
4040
4041          /* If we haven't matched the entire string, and we want the
4042             longest match, try backtracking.  */
4043          if (d != end_match_2)
4044            {
4045              /* 1 if this match ends in the same string (string1 or string2)
4046                 as the best previous match.  */
4047              boolean same_str_p = (FIRST_STRING_P (match_end)
4048                                    == MATCHING_IN_FIRST_STRING);
4049              /* 1 if this match is the best seen so far.  */
4050              boolean best_match_p;
4051
4052              /* AIX compiler got confused when this was combined
4053                 with the previous declaration.  */
4054              if (same_str_p)
4055                best_match_p = d > match_end;
4056              else
4057                best_match_p = !MATCHING_IN_FIRST_STRING;
4058
4059              DEBUG_PRINT1 ("backtracking.\n");
4060
4061              if (!FAIL_STACK_EMPTY ())
4062                { /* More failure points to try.  */
4063
4064                  /* If exceeds best match so far, save it.  */
4065                  if (!best_regs_set || best_match_p)
4066                    {
4067                      best_regs_set = true;
4068                      match_end = d;
4069
4070                      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
4071
4072                      for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4073                        {
4074                          best_regstart[mcnt] = regstart[mcnt];
4075                          best_regend[mcnt] = regend[mcnt];
4076                        }
4077                    }
4078                  goto fail;
4079                }
4080
4081              /* If no failure points, don't restore garbage.  And if
4082                 last match is real best match, don't restore second
4083                 best one. */
4084              else if (best_regs_set && !best_match_p)
4085                {
4086                restore_best_regs:
4087                  /* Restore best match.  It may happen that `dend ==
4088                     end_match_1' while the restored d is in string2.
4089                     For example, the pattern `x.*y.*z' against the
4090                     strings `x-' and `y-z-', if the two strings are
4091                     not consecutive in memory.  */
4092                  DEBUG_PRINT1 ("Restoring best registers.\n");
4093
4094                  d = match_end;
4095                  dend = ((d >= string1 && d <= end1)
4096                           ? end_match_1 : end_match_2);
4097
4098                  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4099                    {
4100                      regstart[mcnt] = best_regstart[mcnt];
4101                      regend[mcnt] = best_regend[mcnt];
4102                    }
4103                }
4104            } /* d != end_match_2 */
4105
4106        succeed_label:
4107          DEBUG_PRINT1 ("Accepting match.\n");
4108
4109          /* If caller wants register contents data back, do it.  */
4110          if (regs && !bufp->no_sub)
4111            {
4112              /* Have the register data arrays been allocated?  */
4113              if (bufp->regs_allocated == REGS_UNALLOCATED)
4114                { /* No.  So allocate them with malloc.  We need one
4115                     extra element beyond `num_regs' for the `-1' marker
4116                     GNU code uses.  */
4117                  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
4118                  regs->start = TALLOC (regs->num_regs, regoff_t);
4119                  regs->end = TALLOC (regs->num_regs, regoff_t);
4120                  if (regs->start == NULL || regs->end == NULL)
4121                    {
4122                      FREE_VARIABLES ();
4123                      return -2;
4124                    }
4125                  bufp->regs_allocated = REGS_REALLOCATE;
4126                }
4127              else if (bufp->regs_allocated == REGS_REALLOCATE)
4128                { /* Yes.  If we need more elements than were already
4129                     allocated, reallocate them.  If we need fewer, just
4130                     leave it alone.  */
4131                  if (regs->num_regs < num_regs + 1)
4132                    {
4133                      regs->num_regs = num_regs + 1;
4134                      RETALLOC (regs->start, regs->num_regs, regoff_t);
4135                      RETALLOC (regs->end, regs->num_regs, regoff_t);
4136                      if (regs->start == NULL || regs->end == NULL)
4137                        {
4138                          FREE_VARIABLES ();
4139                          return -2;
4140                        }
4141                    }
4142                }
4143              else
4144                {
4145                  /* These braces fend off a "empty body in an else-statement"
4146                     warning under GCC when assert expands to nothing.  */
4147                  assert (bufp->regs_allocated == REGS_FIXED);
4148                }
4149
4150              /* Convert the pointer data in `regstart' and `regend' to
4151                 indices.  Register zero has to be set differently,
4152                 since we haven't kept track of any info for it.  */
4153              if (regs->num_regs > 0)
4154                {
4155                  regs->start[0] = pos;
4156                  regs->end[0] = (MATCHING_IN_FIRST_STRING
4157                                  ? ((regoff_t) (d - string1))
4158                                  : ((regoff_t) (d - string2 + size1)));
4159                }
4160
4161              /* Go through the first `min (num_regs, regs->num_regs)'
4162                 registers, since that is all we initialized.  */
4163              for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
4164                   mcnt++)
4165                {
4166                  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
4167                    regs->start[mcnt] = regs->end[mcnt] = -1;
4168                  else
4169                    {
4170                      regs->start[mcnt]
4171                        = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
4172                      regs->end[mcnt]
4173                        = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
4174                    }
4175                }
4176
4177              /* If the regs structure we return has more elements than
4178                 were in the pattern, set the extra elements to -1.  If
4179                 we (re)allocated the registers, this is the case,
4180                 because we always allocate enough to have at least one
4181                 -1 at the end.  */
4182              for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
4183                regs->start[mcnt] = regs->end[mcnt] = -1;
4184            } /* regs && !bufp->no_sub */
4185
4186          DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
4187                        nfailure_points_pushed, nfailure_points_popped,
4188                        nfailure_points_pushed - nfailure_points_popped);
4189          DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
4190
4191          mcnt = d - pos - (MATCHING_IN_FIRST_STRING
4192                            ? string1
4193                            : string2 - size1);
4194
4195          DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
4196
4197          FREE_VARIABLES ();
4198          return mcnt;
4199        }
4200
4201      /* Otherwise match next pattern command.  */
4202      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4203        {
4204        /* Ignore these.  Used to ignore the n of succeed_n's which
4205           currently have n == 0.  */
4206        case no_op:
4207          DEBUG_PRINT1 ("EXECUTING no_op.\n");
4208          break;
4209
4210        case succeed:
4211          DEBUG_PRINT1 ("EXECUTING succeed.\n");
4212          goto succeed_label;
4213
4214        /* Match the next n pattern characters exactly.  The following
4215           byte in the pattern defines n, and the n bytes after that
4216           are the characters to match.  */
4217        case exactn:
4218          mcnt = *p++;
4219          DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
4220
4221          /* This is written out as an if-else so we don't waste time
4222             testing `translate' inside the loop.  */
4223          if (translate)
4224            {
4225              do
4226                {
4227                  PREFETCH ();
4228                  if ((unsigned char) translate[(unsigned char) *d++]
4229                      != (unsigned char) *p++)
4230                    goto fail;
4231                }
4232              while (--mcnt);
4233            }
4234          else
4235            {
4236              do
4237                {
4238                  PREFETCH ();
4239                  if (*d++ != (char) *p++) goto fail;
4240                }
4241              while (--mcnt);
4242            }
4243          SET_REGS_MATCHED ();
4244          break;
4245
4246
4247        /* Match any character except possibly a newline or a null.  */
4248        case anychar:
4249          DEBUG_PRINT1 ("EXECUTING anychar.\n");
4250
4251          PREFETCH ();
4252
4253          if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
4254              || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
4255            goto fail;
4256
4257          SET_REGS_MATCHED ();
4258          DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
4259          d++;
4260          break;
4261
4262
4263        case charset:
4264        case charset_not:
4265          {
4266            register unsigned char c;
4267            boolean not = (re_opcode_t) *(p - 1) == charset_not;
4268
4269            DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
4270
4271            PREFETCH ();
4272            c = TRANSLATE (*d); /* The character to match.  */
4273
4274            /* Cast to `unsigned' instead of `unsigned char' in case the
4275               bit list is a full 32 bytes long.  */
4276            if (c < (unsigned) (*p * BYTEWIDTH)
4277                && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4278              not = !not;
4279
4280            p += 1 + *p;
4281
4282            if (!not) goto fail;
4283
4284            SET_REGS_MATCHED ();
4285            d++;
4286            break;
4287          }
4288
4289
4290        /* The beginning of a group is represented by start_memory.
4291           The arguments are the register number in the next byte, and the
4292           number of groups inner to this one in the next.  The text
4293           matched within the group is recorded (in the internal
4294           registers data structure) under the register number.  */
4295        case start_memory:
4296          DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
4297
4298          /* Find out if this group can match the empty string.  */
4299          p1 = p;               /* To send to group_match_null_string_p.  */
4300
4301          if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
4302            REG_MATCH_NULL_STRING_P (reg_info[*p])
4303              = group_match_null_string_p (&p1, pend, reg_info);
4304
4305          /* Save the position in the string where we were the last time
4306             we were at this open-group operator in case the group is
4307             operated upon by a repetition operator, e.g., with `(a*)*b'
4308             against `ab'; then we want to ignore where we are now in
4309             the string in case this attempt to match fails.  */
4310          old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4311                             ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
4312                             : regstart[*p];
4313          DEBUG_PRINT2 ("  old_regstart: %d\n",
4314                         POINTER_TO_OFFSET (old_regstart[*p]));
4315
4316          regstart[*p] = d;
4317          DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
4318
4319          IS_ACTIVE (reg_info[*p]) = 1;
4320          MATCHED_SOMETHING (reg_info[*p]) = 0;
4321
4322          /* Clear this whenever we change the register activity status.  */
4323          set_regs_matched_done = 0;
4324
4325          /* This is the new highest active register.  */
4326          highest_active_reg = *p;
4327
4328          /* If nothing was active before, this is the new lowest active
4329             register.  */
4330          if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4331            lowest_active_reg = *p;
4332
4333          /* Move past the register number and inner group count.  */
4334          p += 2;
4335          just_past_start_mem = p;
4336
4337          break;
4338
4339
4340        /* The stop_memory opcode represents the end of a group.  Its
4341           arguments are the same as start_memory's: the register
4342           number, and the number of inner groups.  */
4343        case stop_memory:
4344          DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
4345
4346          /* We need to save the string position the last time we were at
4347             this close-group operator in case the group is operated
4348             upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
4349             against `aba'; then we want to ignore where we are now in
4350             the string in case this attempt to match fails.  */
4351          old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4352                           ? REG_UNSET (regend[*p]) ? d : regend[*p]
4353                           : regend[*p];
4354          DEBUG_PRINT2 ("      old_regend: %d\n",
4355                         POINTER_TO_OFFSET (old_regend[*p]));
4356
4357          regend[*p] = d;
4358          DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
4359
4360          /* This register isn't active anymore.  */
4361          IS_ACTIVE (reg_info[*p]) = 0;
4362
4363          /* Clear this whenever we change the register activity status.  */
4364          set_regs_matched_done = 0;
4365
4366          /* If this was the only register active, nothing is active
4367             anymore.  */
4368          if (lowest_active_reg == highest_active_reg)
4369            {
4370              lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4371              highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4372            }
4373          else
4374            { /* We must scan for the new highest active register, since
4375                 it isn't necessarily one less than now: consider
4376                 (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
4377                 new highest active register is 1.  */
4378              unsigned char r = *p - 1;
4379              while (r > 0 && !IS_ACTIVE (reg_info[r]))
4380                r--;
4381
4382              /* If we end up at register zero, that means that we saved
4383                 the registers as the result of an `on_failure_jump', not
4384                 a `start_memory', and we jumped to past the innermost
4385                 `stop_memory'.  For example, in ((.)*) we save
4386                 registers 1 and 2 as a result of the *, but when we pop
4387                 back to the second ), we are at the stop_memory 1.
4388                 Thus, nothing is active.  */
4389              if (r == 0)
4390                {
4391                  lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4392                  highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4393                }
4394              else
4395                highest_active_reg = r;
4396            }
4397
4398          /* If just failed to match something this time around with a
4399             group that's operated on by a repetition operator, try to
4400             force exit from the ``loop'', and restore the register
4401             information for this group that we had before trying this
4402             last match.  */
4403          if ((!MATCHED_SOMETHING (reg_info[*p])
4404               || just_past_start_mem == p - 1)
4405              && (p + 2) < pend)
4406            {
4407              boolean is_a_jump_n = false;
4408
4409              p1 = p + 2;
4410              mcnt = 0;
4411              switch ((re_opcode_t) *p1++)
4412                {
4413                  case jump_n:
4414                    is_a_jump_n = true;
4415                  case pop_failure_jump:
4416                  case maybe_pop_jump:
4417                  case jump:
4418                  case dummy_failure_jump:
4419                    EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4420                    if (is_a_jump_n)
4421                      p1 += 2;
4422                    break;
4423
4424                  default:
4425                    /* do nothing */ ;
4426                }
4427              p1 += mcnt;
4428
4429              /* If the next operation is a jump backwards in the pattern
4430                 to an on_failure_jump right before the start_memory
4431                 corresponding to this stop_memory, exit from the loop
4432                 by forcing a failure after pushing on the stack the
4433                 on_failure_jump's jump in the pattern, and d.  */
4434              if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
4435                  && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
4436                {
4437                  /* If this group ever matched anything, then restore
4438                     what its registers were before trying this last
4439                     failed match, e.g., with `(a*)*b' against `ab' for
4440                     regstart[1], and, e.g., with `((a*)*(b*)*)*'
4441                     against `aba' for regend[3].
4442
4443                     Also restore the registers for inner groups for,
4444                     e.g., `((a*)(b*))*' against `aba' (register 3 would
4445                     otherwise get trashed).  */
4446
4447                  if (EVER_MATCHED_SOMETHING (reg_info[*p]))
4448                    {
4449                      unsigned r;
4450
4451                      EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
4452
4453                      /* Restore this and inner groups' (if any) registers.  */
4454                      for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
4455                           r++)
4456                        {
4457                          regstart[r] = old_regstart[r];
4458
4459                          /* xx why this test?  */
4460                          if (old_regend[r] >= regstart[r])
4461                            regend[r] = old_regend[r];
4462                        }
4463                    }
4464                  p1++;
4465                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4466                  PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
4467
4468                  goto fail;
4469                }
4470            }
4471
4472          /* Move past the register number and the inner group count.  */
4473          p += 2;
4474          break;
4475
4476
4477        /* \<digit> has been turned into a `duplicate' command which is
4478           followed by the numeric value of <digit> as the register number.  */
4479        case duplicate:
4480          {
4481            register const char *d2, *dend2;
4482            int regno = *p++;   /* Get which register to match against.  */
4483            DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
4484
4485            /* Can't back reference a group which we've never matched.  */
4486            if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
4487              goto fail;
4488
4489            /* Where in input to try to start matching.  */
4490            d2 = regstart[regno];
4491
4492            /* Where to stop matching; if both the place to start and
4493               the place to stop matching are in the same string, then
4494               set to the place to stop, otherwise, for now have to use
4495               the end of the first string.  */
4496
4497            dend2 = ((FIRST_STRING_P (regstart[regno])
4498                      == FIRST_STRING_P (regend[regno]))
4499                     ? regend[regno] : end_match_1);
4500            for (;;)
4501              {
4502                /* If necessary, advance to next segment in register
4503                   contents.  */
4504                while (d2 == dend2)
4505                  {
4506                    if (dend2 == end_match_2) break;
4507                    if (dend2 == regend[regno]) break;
4508
4509                    /* End of string1 => advance to string2. */
4510                    d2 = string2;
4511                    dend2 = regend[regno];
4512                  }
4513                /* At end of register contents => success */
4514                if (d2 == dend2) break;
4515
4516                /* If necessary, advance to next segment in data.  */
4517                PREFETCH ();
4518
4519                /* How many characters left in this segment to match.  */
4520                mcnt = dend - d;
4521
4522                /* Want how many consecutive characters we can match in
4523                   one shot, so, if necessary, adjust the count.  */
4524                if (mcnt > dend2 - d2)
4525                  mcnt = dend2 - d2;
4526
4527                /* Compare that many; failure if mismatch, else move
4528                   past them.  */
4529                if (translate
4530                    ? bcmp_translate (d, d2, mcnt, translate)
4531                    : memcmp (d, d2, mcnt))
4532                  goto fail;
4533                d += mcnt, d2 += mcnt;
4534
4535                /* Do this because we've match some characters.  */
4536                SET_REGS_MATCHED ();
4537              }
4538          }
4539          break;
4540
4541
4542        /* begline matches the empty string at the beginning of the string
4543           (unless `not_bol' is set in `bufp'), and, if
4544           `newline_anchor' is set, after newlines.  */
4545        case begline:
4546          DEBUG_PRINT1 ("EXECUTING begline.\n");
4547
4548          if (AT_STRINGS_BEG (d))
4549            {
4550              if (!bufp->not_bol) break;
4551            }
4552          else if (d[-1] == '\n' && bufp->newline_anchor)
4553            {
4554              break;
4555            }
4556          /* In all other cases, we fail.  */
4557          goto fail;
4558
4559
4560        /* endline is the dual of begline.  */
4561        case endline:
4562          DEBUG_PRINT1 ("EXECUTING endline.\n");
4563
4564          if (AT_STRINGS_END (d))
4565            {
4566              if (!bufp->not_eol) break;
4567            }
4568
4569          /* We have to ``prefetch'' the next character.  */
4570          else if ((d == end1 ? *string2 : *d) == '\n'
4571                   && bufp->newline_anchor)
4572            {
4573              break;
4574            }
4575          goto fail;
4576
4577
4578        /* Match at the very beginning of the data.  */
4579        case begbuf:
4580          DEBUG_PRINT1 ("EXECUTING begbuf.\n");
4581          if (AT_STRINGS_BEG (d))
4582            break;
4583          goto fail;
4584
4585
4586        /* Match at the very end of the data.  */
4587        case endbuf:
4588          DEBUG_PRINT1 ("EXECUTING endbuf.\n");
4589          if (AT_STRINGS_END (d))
4590            break;
4591          goto fail;
4592
4593
4594        /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
4595           pushes NULL as the value for the string on the stack.  Then
4596           `pop_failure_point' will keep the current value for the
4597           string, instead of restoring it.  To see why, consider
4598           matching `foo\nbar' against `.*\n'.  The .* matches the foo;
4599           then the . fails against the \n.  But the next thing we want
4600           to do is match the \n against the \n; if we restored the
4601           string value, we would be back at the foo.
4602
4603           Because this is used only in specific cases, we don't need to
4604           check all the things that `on_failure_jump' does, to make
4605           sure the right things get saved on the stack.  Hence we don't
4606           share its code.  The only reason to push anything on the
4607           stack at all is that otherwise we would have to change
4608           `anychar's code to do something besides goto fail in this
4609           case; that seems worse than this.  */
4610        case on_failure_keep_string_jump:
4611          DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
4612
4613          EXTRACT_NUMBER_AND_INCR (mcnt, p);
4614#ifdef _LIBC
4615          DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
4616#else
4617          DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
4618#endif
4619
4620          PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
4621          break;
4622
4623
4624        /* Uses of on_failure_jump:
4625
4626           Each alternative starts with an on_failure_jump that points
4627           to the beginning of the next alternative.  Each alternative
4628           except the last ends with a jump that in effect jumps past
4629           the rest of the alternatives.  (They really jump to the
4630           ending jump of the following alternative, because tensioning
4631           these jumps is a hassle.)
4632
4633           Repeats start with an on_failure_jump that points past both
4634           the repetition text and either the following jump or
4635           pop_failure_jump back to this on_failure_jump.  */
4636        case on_failure_jump:
4637        on_failure:
4638          DEBUG_PRINT1 ("EXECUTING on_failure_jump");
4639
4640          EXTRACT_NUMBER_AND_INCR (mcnt, p);
4641#ifdef _LIBC
4642          DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
4643#else
4644          DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
4645#endif
4646
4647          /* If this on_failure_jump comes right before a group (i.e.,
4648             the original * applied to a group), save the information
4649             for that group and all inner ones, so that if we fail back
4650             to this point, the group's information will be correct.
4651             For example, in \(a*\)*\1, we need the preceding group,
4652             and in \(zz\(a*\)b*\)\2, we need the inner group.  */
4653
4654          /* We can't use `p' to check ahead because we push
4655             a failure point to `p + mcnt' after we do this.  */
4656          p1 = p;
4657
4658          /* We need to skip no_op's before we look for the
4659             start_memory in case this on_failure_jump is happening as
4660             the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
4661             against aba.  */
4662          while (p1 < pend && (re_opcode_t) *p1 == no_op)
4663            p1++;
4664
4665          if (p1 < pend && (re_opcode_t) *p1 == start_memory)
4666            {
4667              /* We have a new highest active register now.  This will
4668                 get reset at the start_memory we are about to get to,
4669                 but we will have saved all the registers relevant to
4670                 this repetition op, as described above.  */
4671              highest_active_reg = *(p1 + 1) + *(p1 + 2);
4672              if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4673                lowest_active_reg = *(p1 + 1);
4674            }
4675
4676          DEBUG_PRINT1 (":\n");
4677          PUSH_FAILURE_POINT (p + mcnt, d, -2);
4678          break;
4679
4680
4681        /* A smart repeat ends with `maybe_pop_jump'.
4682           We change it to either `pop_failure_jump' or `jump'.  */
4683        case maybe_pop_jump:
4684          EXTRACT_NUMBER_AND_INCR (mcnt, p);
4685          DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
4686          {
4687            register unsigned char *p2 = p;
4688
4689            /* Compare the beginning of the repeat with what in the
4690               pattern follows its end. If we can establish that there
4691               is nothing that they would both match, i.e., that we
4692               would have to backtrack because of (as in, e.g., `a*a')
4693               then we can change to pop_failure_jump, because we'll
4694               never have to backtrack.
4695
4696               This is not true in the case of alternatives: in
4697               `(a|ab)*' we do need to backtrack to the `ab' alternative
4698               (e.g., if the string was `ab').  But instead of trying to
4699               detect that here, the alternative has put on a dummy
4700               failure point which is what we will end up popping.  */
4701
4702            /* Skip over open/close-group commands.
4703               If what follows this loop is a ...+ construct,
4704               look at what begins its body, since we will have to
4705               match at least one of that.  */
4706            while (1)
4707              {
4708                if (p2 + 2 < pend
4709                    && ((re_opcode_t) *p2 == stop_memory
4710                        || (re_opcode_t) *p2 == start_memory))
4711                  p2 += 3;
4712                else if (p2 + 6 < pend
4713                         && (re_opcode_t) *p2 == dummy_failure_jump)
4714                  p2 += 6;
4715                else
4716                  break;
4717              }
4718
4719            p1 = p + mcnt;
4720            /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4721               to the `maybe_finalize_jump' of this case.  Examine what
4722               follows.  */
4723
4724            /* If we're at the end of the pattern, we can change.  */
4725            if (p2 == pend)
4726              {
4727                /* Consider what happens when matching ":\(.*\)"
4728                   against ":/".  I don't really understand this code
4729                   yet.  */
4730                p[-3] = (unsigned char) pop_failure_jump;
4731                DEBUG_PRINT1
4732                  ("  End of pattern: change to `pop_failure_jump'.\n");
4733              }
4734
4735            else if ((re_opcode_t) *p2 == exactn
4736                     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4737              {
4738                register unsigned char c
4739                  = *p2 == (unsigned char) endline ? '\n' : p2[2];
4740
4741                if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4742                  {
4743                    p[-3] = (unsigned char) pop_failure_jump;
4744                    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4745                                  c, p1[5]);
4746                  }
4747
4748                else if ((re_opcode_t) p1[3] == charset
4749                         || (re_opcode_t) p1[3] == charset_not)
4750                  {
4751                    int not = (re_opcode_t) p1[3] == charset_not;
4752
4753                    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4754                        && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4755                      not = !not;
4756
4757                    /* `not' is equal to 1 if c would match, which means
4758                        that we can't change to pop_failure_jump.  */
4759                    if (!not)
4760                      {
4761                        p[-3] = (unsigned char) pop_failure_jump;
4762                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4763                      }
4764                  }
4765              }
4766            else if ((re_opcode_t) *p2 == charset)
4767              {
4768#ifdef DEBUG
4769                register unsigned char c
4770                  = *p2 == (unsigned char) endline ? '\n' : p2[2];
4771#endif
4772
4773#if 0
4774                if ((re_opcode_t) p1[3] == exactn
4775                    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
4776                          && (p2[2 + p1[5] / BYTEWIDTH]
4777                              & (1 << (p1[5] % BYTEWIDTH)))))
4778#else
4779                if ((re_opcode_t) p1[3] == exactn
4780                    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
4781                          && (p2[2 + p1[4] / BYTEWIDTH]
4782                              & (1 << (p1[4] % BYTEWIDTH)))))
4783#endif
4784                  {
4785                    p[-3] = (unsigned char) pop_failure_jump;
4786                    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4787                                  c, p1[5]);
4788                  }
4789
4790                else if ((re_opcode_t) p1[3] == charset_not)
4791                  {
4792                    int idx;
4793                    /* We win if the charset_not inside the loop
4794                       lists every character listed in the charset after.  */
4795                    for (idx = 0; idx < (int) p2[1]; idx++)
4796                      if (! (p2[2 + idx] == 0
4797                             || (idx < (int) p1[4]
4798                                 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
4799                        break;
4800
4801                    if (idx == p2[1])
4802                      {
4803                        p[-3] = (unsigned char) pop_failure_jump;
4804                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4805                      }
4806                  }
4807                else if ((re_opcode_t) p1[3] == charset)
4808                  {
4809                    int idx;
4810                    /* We win if the charset inside the loop
4811                       has no overlap with the one after the loop.  */
4812                    for (idx = 0;
4813                         idx < (int) p2[1] && idx < (int) p1[4];
4814                         idx++)
4815                      if ((p2[2 + idx] & p1[5 + idx]) != 0)
4816                        break;
4817
4818                    if (idx == p2[1] || idx == p1[4])
4819                      {
4820                        p[-3] = (unsigned char) pop_failure_jump;
4821                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4822                      }
4823                  }
4824              }
4825          }
4826          p -= 2;               /* Point at relative address again.  */
4827          if ((re_opcode_t) p[-1] != pop_failure_jump)
4828            {
4829              p[-1] = (unsigned char) jump;
4830              DEBUG_PRINT1 ("  Match => jump.\n");
4831              goto unconditional_jump;
4832            }
4833        /* Note fall through.  */
4834
4835
4836        /* The end of a simple repeat has a pop_failure_jump back to
4837           its matching on_failure_jump, where the latter will push a
4838           failure point.  The pop_failure_jump takes off failure
4839           points put on by this pop_failure_jump's matching
4840           on_failure_jump; we got through the pattern to here from the
4841           matching on_failure_jump, so didn't fail.  */
4842        case pop_failure_jump:
4843          {
4844            /* We need to pass separate storage for the lowest and
4845               highest registers, even though we don't care about the
4846               actual values.  Otherwise, we will restore only one
4847               register from the stack, since lowest will == highest in
4848               `pop_failure_point'.  */
4849            active_reg_t dummy_low_reg, dummy_high_reg;
4850            unsigned char *pdummy;
4851            const char *sdummy;
4852
4853            DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4854            POP_FAILURE_POINT (sdummy, pdummy,
4855                               dummy_low_reg, dummy_high_reg,
4856                               reg_dummy, reg_dummy, reg_info_dummy);
4857          }
4858          /* Note fall through.  */
4859
4860        unconditional_jump:
4861#ifdef _LIBC
4862          DEBUG_PRINT2 ("\n%p: ", p);
4863#else
4864          DEBUG_PRINT2 ("\n0x%x: ", p);
4865#endif
4866          /* Note fall through.  */
4867
4868        /* Unconditionally jump (without popping any failure points).  */
4869        case jump:
4870          EXTRACT_NUMBER_AND_INCR (mcnt, p);    /* Get the amount to jump.  */
4871          DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4872          p += mcnt;                            /* Do the jump.  */
4873#ifdef _LIBC
4874          DEBUG_PRINT2 ("(to %p).\n", p);
4875#else
4876          DEBUG_PRINT2 ("(to 0x%x).\n", p);
4877#endif
4878          break;
4879
4880
4881        /* We need this opcode so we can detect where alternatives end
4882           in `group_match_null_string_p' et al.  */
4883        case jump_past_alt:
4884          DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4885          goto unconditional_jump;
4886
4887
4888        /* Normally, the on_failure_jump pushes a failure point, which
4889           then gets popped at pop_failure_jump.  We will end up at
4890           pop_failure_jump, also, and with a pattern of, say, `a+', we
4891           are skipping over the on_failure_jump, so we have to push
4892           something meaningless for pop_failure_jump to pop.  */
4893        case dummy_failure_jump:
4894          DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4895          /* It doesn't matter what we push for the string here.  What
4896             the code at `fail' tests is the value for the pattern.  */
4897          PUSH_FAILURE_POINT (NULL, NULL, -2);
4898          goto unconditional_jump;
4899
4900
4901        /* At the end of an alternative, we need to push a dummy failure
4902           point in case we are followed by a `pop_failure_jump', because
4903           we don't want the failure point for the alternative to be
4904           popped.  For example, matching `(a|ab)*' against `aab'
4905           requires that we match the `ab' alternative.  */
4906        case push_dummy_failure:
4907          DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4908          /* See comments just above at `dummy_failure_jump' about the
4909             two zeroes.  */
4910          PUSH_FAILURE_POINT (NULL, NULL, -2);
4911          break;
4912
4913        /* Have to succeed matching what follows at least n times.
4914           After that, handle like `on_failure_jump'.  */
4915        case succeed_n:
4916          EXTRACT_NUMBER (mcnt, p + 2);
4917          DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4918
4919          assert (mcnt >= 0);
4920          /* Originally, this is how many times we HAVE to succeed.  */
4921          if (mcnt > 0)
4922            {
4923               mcnt--;
4924               p += 2;
4925               STORE_NUMBER_AND_INCR (p, mcnt);
4926#ifdef _LIBC
4927               DEBUG_PRINT3 ("  Setting %p to %d.\n", p - 2, mcnt);
4928#else
4929               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - 2, mcnt);
4930#endif
4931            }
4932          else if (mcnt == 0)
4933            {
4934#ifdef _LIBC
4935              DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n", p+2);
4936#else
4937              DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
4938#endif
4939              p[2] = (unsigned char) no_op;
4940              p[3] = (unsigned char) no_op;
4941              goto on_failure;
4942            }
4943          break;
4944
4945        case jump_n:
4946          EXTRACT_NUMBER (mcnt, p + 2);
4947          DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4948
4949          /* Originally, this is how many times we CAN jump.  */
4950          if (mcnt)
4951            {
4952               mcnt--;
4953               STORE_NUMBER (p + 2, mcnt);
4954#ifdef _LIBC
4955               DEBUG_PRINT3 ("  Setting %p to %d.\n", p + 2, mcnt);
4956#else
4957               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + 2, mcnt);
4958#endif
4959               goto unconditional_jump;
4960            }
4961          /* If don't have to jump any more, skip over the rest of command.  */
4962          else
4963            p += 4;
4964          break;
4965
4966        case set_number_at:
4967          {
4968            DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4969
4970            EXTRACT_NUMBER_AND_INCR (mcnt, p);
4971            p1 = p + mcnt;
4972            EXTRACT_NUMBER_AND_INCR (mcnt, p);
4973#ifdef _LIBC
4974            DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
4975#else
4976            DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
4977#endif
4978            STORE_NUMBER (p1, mcnt);
4979            break;
4980          }
4981
4982#if 0
4983        /* The DEC Alpha C compiler 3.x generates incorrect code for the
4984           test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
4985           AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
4986           macro and introducing temporary variables works around the bug.  */
4987
4988        case wordbound:
4989          DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4990          if (AT_WORD_BOUNDARY (d))
4991            break;
4992          goto fail;
4993
4994        case notwordbound:
4995          DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4996          if (AT_WORD_BOUNDARY (d))
4997            goto fail;
4998          break;
4999#else
5000        case wordbound:
5001        {
5002          boolean prevchar, thischar;
5003
5004          DEBUG_PRINT1 ("EXECUTING wordbound.\n");
5005          if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5006            break;
5007
5008          prevchar = WORDCHAR_P (d - 1);
5009          thischar = WORDCHAR_P (d);
5010          if (prevchar != thischar)
5011            break;
5012          goto fail;
5013        }
5014
5015      case notwordbound:
5016        {
5017          boolean prevchar, thischar;
5018
5019          DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
5020          if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5021            goto fail;
5022
5023          prevchar = WORDCHAR_P (d - 1);
5024          thischar = WORDCHAR_P (d);
5025          if (prevchar != thischar)
5026            goto fail;
5027          break;
5028        }
5029#endif
5030
5031        case wordbeg:
5032          DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
5033          if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
5034            break;
5035          goto fail;
5036
5037        case wordend:
5038          DEBUG_PRINT1 ("EXECUTING wordend.\n");
5039          if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
5040              && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
5041            break;
5042          goto fail;
5043
5044#ifdef emacs
5045        case before_dot:
5046          DEBUG_PRINT1 ("EXECUTING before_dot.\n");
5047          if (PTR_CHAR_POS ((unsigned char *) d) >= point)
5048            goto fail;
5049          break;
5050
5051        case at_dot:
5052          DEBUG_PRINT1 ("EXECUTING at_dot.\n");
5053          if (PTR_CHAR_POS ((unsigned char *) d) != point)
5054            goto fail;
5055          break;
5056
5057        case after_dot:
5058          DEBUG_PRINT1 ("EXECUTING after_dot.\n");
5059          if (PTR_CHAR_POS ((unsigned char *) d) <= point)
5060            goto fail;
5061          break;
5062
5063        case syntaxspec:
5064          DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
5065          mcnt = *p++;
5066          goto matchsyntax;
5067
5068        case wordchar:
5069          DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
5070          mcnt = (int) Sword;
5071        matchsyntax:
5072          PREFETCH ();
5073          /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
5074          d++;
5075          if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
5076            goto fail;
5077          SET_REGS_MATCHED ();
5078          break;
5079
5080        case notsyntaxspec:
5081          DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
5082          mcnt = *p++;
5083          goto matchnotsyntax;
5084
5085        case notwordchar:
5086          DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
5087          mcnt = (int) Sword;
5088        matchnotsyntax:
5089          PREFETCH ();
5090          /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
5091          d++;
5092          if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
5093            goto fail;
5094          SET_REGS_MATCHED ();
5095          break;
5096
5097#else /* not emacs */
5098        case wordchar:
5099          DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
5100          PREFETCH ();
5101          if (!WORDCHAR_P (d))
5102            goto fail;
5103          SET_REGS_MATCHED ();
5104          d++;
5105          break;
5106
5107        case notwordchar:
5108          DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
5109          PREFETCH ();
5110          if (WORDCHAR_P (d))
5111            goto fail;
5112          SET_REGS_MATCHED ();
5113          d++;
5114          break;
5115#endif /* not emacs */
5116
5117        default:
5118          abort ();
5119        }
5120      continue;  /* Successfully executed one pattern command; keep going.  */
5121
5122
5123    /* We goto here if a matching operation fails. */
5124    fail:
5125      if (!FAIL_STACK_EMPTY ())
5126        { /* A restart point is known.  Restore to that state.  */
5127          DEBUG_PRINT1 ("\nFAIL:\n");
5128          POP_FAILURE_POINT (d, p,
5129                             lowest_active_reg, highest_active_reg,
5130                             regstart, regend, reg_info);
5131
5132          /* If this failure point is a dummy, try the next one.  */
5133          if (!p)
5134            goto fail;
5135
5136          /* If we failed to the end of the pattern, don't examine *p.  */
5137          assert (p <= pend);
5138          if (p < pend)
5139            {
5140              boolean is_a_jump_n = false;
5141
5142              /* If failed to a backwards jump that's part of a repetition
5143                 loop, need to pop this failure point and use the next one.  */
5144              switch ((re_opcode_t) *p)
5145                {
5146                case jump_n:
5147                  is_a_jump_n = true;
5148                case maybe_pop_jump:
5149                case pop_failure_jump:
5150                case jump:
5151                  p1 = p + 1;
5152                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5153                  p1 += mcnt;
5154
5155                  if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
5156                      || (!is_a_jump_n
5157                          && (re_opcode_t) *p1 == on_failure_jump))
5158                    goto fail;
5159                  break;
5160                default:
5161                  /* do nothing */ ;
5162                }
5163            }
5164
5165          if (d >= string1 && d <= end1)
5166            dend = end_match_1;
5167        }
5168      else
5169        break;   /* Matching at this starting point really fails.  */
5170    } /* for (;;) */
5171
5172  if (best_regs_set)
5173    goto restore_best_regs;
5174
5175  FREE_VARIABLES ();
5176
5177  return -1;                            /* Failure to match.  */
5178} /* re_match_2 */
5179
5180/* Subroutine definitions for re_match_2.  */
5181
5182
5183/* We are passed P pointing to a register number after a start_memory.
5184
5185   Return true if the pattern up to the corresponding stop_memory can
5186   match the empty string, and false otherwise.
5187
5188   If we find the matching stop_memory, sets P to point to one past its number.
5189   Otherwise, sets P to an undefined byte less than or equal to END.
5190
5191   We don't handle duplicates properly (yet).  */
5192
5193static boolean
5194group_match_null_string_p (p, end, reg_info)
5195    unsigned char **p, *end;
5196    register_info_type *reg_info;
5197{
5198  int mcnt;
5199  /* Point to after the args to the start_memory.  */
5200  unsigned char *p1 = *p + 2;
5201
5202  while (p1 < end)
5203    {
5204      /* Skip over opcodes that can match nothing, and return true or
5205         false, as appropriate, when we get to one that can't, or to the
5206         matching stop_memory.  */
5207
5208      switch ((re_opcode_t) *p1)
5209        {
5210        /* Could be either a loop or a series of alternatives.  */
5211        case on_failure_jump:
5212          p1++;
5213          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5214
5215          /* If the next operation is not a jump backwards in the
5216             pattern.  */
5217
5218          if (mcnt >= 0)
5219            {
5220              /* Go through the on_failure_jumps of the alternatives,
5221                 seeing if any of the alternatives cannot match nothing.
5222                 The last alternative starts with only a jump,
5223                 whereas the rest start with on_failure_jump and end
5224                 with a jump, e.g., here is the pattern for `a|b|c':
5225
5226                 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
5227                 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
5228                 /exactn/1/c
5229
5230                 So, we have to first go through the first (n-1)
5231                 alternatives and then deal with the last one separately.  */
5232
5233
5234              /* Deal with the first (n-1) alternatives, which start
5235                 with an on_failure_jump (see above) that jumps to right
5236                 past a jump_past_alt.  */
5237
5238              while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
5239                {
5240                  /* `mcnt' holds how many bytes long the alternative
5241                     is, including the ending `jump_past_alt' and
5242                     its number.  */
5243
5244                  if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
5245                                                      reg_info))
5246                    return false;
5247
5248                  /* Move to right after this alternative, including the
5249                     jump_past_alt.  */
5250                  p1 += mcnt;
5251
5252                  /* Break if it's the beginning of an n-th alternative
5253                     that doesn't begin with an on_failure_jump.  */
5254                  if ((re_opcode_t) *p1 != on_failure_jump)
5255                    break;
5256
5257                  /* Still have to check that it's not an n-th
5258                     alternative that starts with an on_failure_jump.  */
5259                  p1++;
5260                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5261                  if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
5262                    {
5263                      /* Get to the beginning of the n-th alternative.  */
5264                      p1 -= 3;
5265                      break;
5266                    }
5267                }
5268
5269              /* Deal with the last alternative: go back and get number
5270                 of the `jump_past_alt' just before it.  `mcnt' contains
5271                 the length of the alternative.  */
5272              EXTRACT_NUMBER (mcnt, p1 - 2);
5273
5274              if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
5275                return false;
5276
5277              p1 += mcnt;       /* Get past the n-th alternative.  */
5278            } /* if mcnt > 0 */
5279          break;
5280
5281
5282        case stop_memory:
5283          assert (p1[1] == **p);
5284          *p = p1 + 2;
5285          return true;
5286
5287
5288        default:
5289          if (!common_op_match_null_string_p (&p1, end, reg_info))
5290            return false;
5291        }
5292    } /* while p1 < end */
5293
5294  return false;
5295} /* group_match_null_string_p */
5296
5297
5298/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
5299   It expects P to be the first byte of a single alternative and END one
5300   byte past the last. The alternative can contain groups.  */
5301
5302static boolean
5303alt_match_null_string_p (p, end, reg_info)
5304    unsigned char *p, *end;
5305    register_info_type *reg_info;
5306{
5307  int mcnt;
5308  unsigned char *p1 = p;
5309
5310  while (p1 < end)
5311    {
5312      /* Skip over opcodes that can match nothing, and break when we get
5313         to one that can't.  */
5314
5315      switch ((re_opcode_t) *p1)
5316        {
5317        /* It's a loop.  */
5318        case on_failure_jump:
5319          p1++;
5320          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5321          p1 += mcnt;
5322          break;
5323
5324        default:
5325          if (!common_op_match_null_string_p (&p1, end, reg_info))
5326            return false;
5327        }
5328    }  /* while p1 < end */
5329
5330  return true;
5331} /* alt_match_null_string_p */
5332
5333
5334/* Deals with the ops common to group_match_null_string_p and
5335   alt_match_null_string_p.
5336
5337   Sets P to one after the op and its arguments, if any.  */
5338
5339static boolean
5340common_op_match_null_string_p (p, end, reg_info)
5341    unsigned char **p, *end;
5342    register_info_type *reg_info;
5343{
5344  int mcnt;
5345  boolean ret;
5346  int reg_no;
5347  unsigned char *p1 = *p;
5348
5349  switch ((re_opcode_t) *p1++)
5350    {
5351    case no_op:
5352    case begline:
5353    case endline:
5354    case begbuf:
5355    case endbuf:
5356    case wordbeg:
5357    case wordend:
5358    case wordbound:
5359    case notwordbound:
5360#ifdef emacs
5361    case before_dot:
5362    case at_dot:
5363    case after_dot:
5364#endif
5365      break;
5366
5367    case start_memory:
5368      reg_no = *p1;
5369      assert (reg_no > 0 && reg_no <= MAX_REGNUM);
5370      ret = group_match_null_string_p (&p1, end, reg_info);
5371
5372      /* Have to set this here in case we're checking a group which
5373         contains a group and a back reference to it.  */
5374
5375      if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
5376        REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
5377
5378      if (!ret)
5379        return false;
5380      break;
5381
5382    /* If this is an optimized succeed_n for zero times, make the jump.  */
5383    case jump:
5384      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5385      if (mcnt >= 0)
5386        p1 += mcnt;
5387      else
5388        return false;
5389      break;
5390
5391    case succeed_n:
5392      /* Get to the number of times to succeed.  */
5393      p1 += 2;
5394      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5395
5396      if (mcnt == 0)
5397        {
5398          p1 -= 4;
5399          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5400          p1 += mcnt;
5401        }
5402      else
5403        return false;
5404      break;
5405
5406    case duplicate:
5407      if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
5408        return false;
5409      break;
5410
5411    case set_number_at:
5412      p1 += 4;
5413
5414    default:
5415      /* All other opcodes mean we cannot match the empty string.  */
5416      return false;
5417  }
5418
5419  *p = p1;
5420  return true;
5421} /* common_op_match_null_string_p */
5422
5423
5424/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
5425   bytes; nonzero otherwise.  */
5426
5427static int
5428bcmp_translate (s1, s2, len, translate)
5429     const char *s1, *s2;
5430     register int len;
5431     RE_TRANSLATE_TYPE translate;
5432{
5433  register const unsigned char *p1 = (const unsigned char *) s1;
5434  register const unsigned char *p2 = (const unsigned char *) s2;
5435  while (len)
5436    {
5437      if (translate[*p1++] != translate[*p2++]) return 1;
5438      len--;
5439    }
5440  return 0;
5441}
5442
5443/* Entry points for GNU code.  */
5444
5445/* re_compile_pattern is the GNU regular expression compiler: it
5446   compiles PATTERN (of length SIZE) and puts the result in BUFP.
5447   Returns 0 if the pattern was valid, otherwise an error string.
5448
5449   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
5450   are set in BUFP on entry.
5451
5452   We call regex_compile to do the actual compilation.  */
5453
5454const char *
5455re_compile_pattern (pattern, length, bufp)
5456     const char *pattern;
5457     size_t length;
5458     struct re_pattern_buffer *bufp;
5459{
5460  reg_errcode_t ret;
5461
5462  /* GNU code is written to assume at least RE_NREGS registers will be set
5463     (and at least one extra will be -1).  */
5464  bufp->regs_allocated = REGS_UNALLOCATED;
5465
5466  /* And GNU code determines whether or not to get register information
5467     by passing null for the REGS argument to re_match, etc., not by
5468     setting no_sub.  */
5469  bufp->no_sub = 0;
5470
5471  /* Match anchors at newline.  */
5472  bufp->newline_anchor = 1;
5473
5474  ret = regex_compile (pattern, length, re_syntax_options, bufp);
5475
5476  if (!ret)
5477    return NULL;
5478  return gettext (re_error_msgid[(int) ret]);
5479}
5480#ifdef _LIBC
5481weak_alias (__re_compile_pattern, re_compile_pattern)
5482#endif
5483
5484/* Entry points compatible with 4.2 BSD regex library.  We don't define
5485   them unless specifically requested.  */
5486
5487#if defined _REGEX_RE_COMP || defined _LIBC
5488
5489/* BSD has one and only one pattern buffer.  */
5490static struct re_pattern_buffer re_comp_buf;
5491
5492char *
5493#ifdef _LIBC
5494/* Make these definitions weak in libc, so POSIX programs can redefine
5495   these names if they don't use our functions, and still use
5496   regcomp/regexec below without link errors.  */
5497weak_function
5498#endif
5499re_comp (s)
5500    const char *s;
5501{
5502  reg_errcode_t ret;
5503
5504  if (!s)
5505    {
5506      if (!re_comp_buf.buffer)
5507        return gettext ("No previous regular expression");
5508      return 0;
5509    }
5510
5511  if (!re_comp_buf.buffer)
5512    {
5513      re_comp_buf.buffer = (unsigned char *) malloc (200);
5514      if (re_comp_buf.buffer == NULL)
5515        return gettext (re_error_msgid[(int) REG_ESPACE]);
5516      re_comp_buf.allocated = 200;
5517
5518      re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
5519      if (re_comp_buf.fastmap == NULL)
5520        return gettext (re_error_msgid[(int) REG_ESPACE]);
5521    }
5522
5523  /* Since `re_exec' always passes NULL for the `regs' argument, we
5524     don't need to initialize the pattern buffer fields which affect it.  */
5525
5526  /* Match anchors at newlines.  */
5527  re_comp_buf.newline_anchor = 1;
5528
5529  ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
5530
5531  if (!ret)
5532    return NULL;
5533
5534  /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
5535  return (char *) gettext (re_error_msgid[(int) ret]);
5536}
5537
5538
5539int
5540#ifdef _LIBC
5541weak_function
5542#endif
5543re_exec (s)
5544    const char *s;
5545{
5546  const int len = strlen (s);
5547  return
5548    0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
5549}
5550
5551#endif /* _REGEX_RE_COMP */
5552
5553/* POSIX.2 functions.  Don't define these for Emacs.  */
5554
5555#ifndef emacs
5556
5557/* regcomp takes a regular expression as a string and compiles it.
5558
5559   PREG is a regex_t *.  We do not expect any fields to be initialized,
5560   since POSIX says we shouldn't.  Thus, we set
5561
5562     `buffer' to the compiled pattern;
5563     `used' to the length of the compiled pattern;
5564     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
5565       REG_EXTENDED bit in CFLAGS is set; otherwise, to
5566       RE_SYNTAX_POSIX_BASIC;
5567     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
5568     `fastmap' and `fastmap_accurate' to zero;
5569     `re_nsub' to the number of subexpressions in PATTERN.
5570
5571   PATTERN is the address of the pattern string.
5572
5573   CFLAGS is a series of bits which affect compilation.
5574
5575     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
5576     use POSIX basic syntax.
5577
5578     If REG_NEWLINE is set, then . and [^...] don't match newline.
5579     Also, regexec will try a match beginning after every newline.
5580
5581     If REG_ICASE is set, then we considers upper- and lowercase
5582     versions of letters to be equivalent when matching.
5583
5584     If REG_NOSUB is set, then when PREG is passed to regexec, that
5585     routine will report only success or failure, and nothing about the
5586     registers.
5587
5588   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex-gnu.h for
5589   the return codes and their meanings.)  */
5590
5591int
5592regcomp (preg, pattern, cflags)
5593    regex_t *preg;
5594    const char *pattern;
5595    int cflags;
5596{
5597  reg_errcode_t ret;
5598  reg_syntax_t syntax
5599    = (cflags & REG_EXTENDED) ?
5600      RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
5601
5602  /* regex_compile will allocate the space for the compiled pattern.  */
5603  preg->buffer = 0;
5604  preg->allocated = 0;
5605  preg->used = 0;
5606
5607  /* Don't bother to use a fastmap when searching.  This simplifies the
5608     REG_NEWLINE case: if we used a fastmap, we'd have to put all the
5609     characters after newlines into the fastmap.  This way, we just try
5610     every character.  */
5611  preg->fastmap = 0;
5612
5613  if (cflags & REG_ICASE)
5614    {
5615      unsigned i;
5616
5617      preg->translate
5618        = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
5619                                      * sizeof (*(RE_TRANSLATE_TYPE)0));
5620      if (preg->translate == NULL)
5621        return (int) REG_ESPACE;
5622
5623      /* Map uppercase characters to corresponding lowercase ones.  */
5624      for (i = 0; i < CHAR_SET_SIZE; i++)
5625        preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
5626    }
5627  else
5628    preg->translate = NULL;
5629
5630  /* If REG_NEWLINE is set, newlines are treated differently.  */
5631  if (cflags & REG_NEWLINE)
5632    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
5633      syntax &= ~RE_DOT_NEWLINE;
5634      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
5635      /* It also changes the matching behavior.  */
5636      preg->newline_anchor = 1;
5637    }
5638  else
5639    preg->newline_anchor = 0;
5640
5641  preg->no_sub = !!(cflags & REG_NOSUB);
5642
5643  /* POSIX says a null character in the pattern terminates it, so we
5644     can use strlen here in compiling the pattern.  */
5645  ret = regex_compile (pattern, strlen (pattern), syntax, preg);
5646
5647  /* POSIX doesn't distinguish between an unmatched open-group and an
5648     unmatched close-group: both are REG_EPAREN.  */
5649  if (ret == REG_ERPAREN) ret = REG_EPAREN;
5650
5651  return (int) ret;
5652}
5653#ifdef _LIBC
5654weak_alias (__regcomp, regcomp)
5655#endif
5656
5657
5658/* regnexec searches for a given pattern, specified by PREG, in the
5659   memory buffer STRING of lengthe LEN.
5660
5661   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
5662   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
5663   least NMATCH elements, and we set them to the offsets of the
5664   corresponding matched substrings.
5665
5666   EFLAGS specifies `execution flags' which affect matching: if
5667   REG_NOTBOL is set, then ^ does not match at the beginning of the
5668   string; if REG_NOTEOL is set, then $ does not match at the end.
5669
5670   We return 0 if we find a match and REG_NOMATCH if not.  */
5671
5672int
5673regnexec (preg, string, len, nmatch, pmatch, eflags)
5674    const regex_t *preg;
5675    const char *string;
5676    size_t len;
5677    size_t nmatch;
5678    regmatch_t pmatch[];
5679    int eflags;
5680{
5681  int ret;
5682  struct re_registers regs;
5683  regex_t private_preg;
5684  boolean want_reg_info = !preg->no_sub && nmatch > 0;
5685
5686  private_preg = *preg;
5687
5688  private_preg.not_bol = !!(eflags & REG_NOTBOL);
5689  private_preg.not_eol = !!(eflags & REG_NOTEOL);
5690
5691  /* The user has told us exactly how many registers to return
5692     information about, via `nmatch'.  We have to pass that on to the
5693     matching routines.  */
5694  private_preg.regs_allocated = REGS_FIXED;
5695
5696  if (want_reg_info)
5697    {
5698      regs.num_regs = nmatch;
5699      regs.start = TALLOC (nmatch, regoff_t);
5700      regs.end = TALLOC (nmatch, regoff_t);
5701      if (regs.start == NULL || regs.end == NULL)
5702        return (int) REG_NOMATCH;
5703    }
5704
5705  /* Perform the searching operation.  */
5706  ret = re_search (&private_preg, string, len,
5707                   /* start: */ 0, /* range: */ len,
5708                   want_reg_info ? &regs : (struct re_registers *) 0);
5709
5710  /* Copy the register information to the POSIX structure.  */
5711  if (want_reg_info)
5712    {
5713      if (ret >= 0)
5714        {
5715          unsigned r;
5716
5717          for (r = 0; r < nmatch; r++)
5718            {
5719              pmatch[r].rm_so = regs.start[r];
5720              pmatch[r].rm_eo = regs.end[r];
5721            }
5722        }
5723
5724      /* If we needed the temporary register info, free the space now.  */
5725      free (regs.start);
5726      free (regs.end);
5727    }
5728
5729  /* We want zero return to mean success, unlike `re_search'.  */
5730  return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
5731}
5732#ifdef _LIBC
5733weak_alias (__regnexec, regnexec)
5734#endif
5735
5736/* regexec searches for a given pattern, specified by PREG, in the
5737   string STRING.
5738
5739   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
5740   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
5741   least NMATCH elements, and we set them to the offsets of the
5742   corresponding matched substrings.
5743
5744   EFLAGS specifies `execution flags' which affect matching: if
5745   REG_NOTBOL is set, then ^ does not match at the beginning of the
5746   string; if REG_NOTEOL is set, then $ does not match at the end.
5747
5748   We return 0 if we find a match and REG_NOMATCH if not.  */
5749
5750int
5751regexec (preg, string, nmatch, pmatch, eflags)
5752    const regex_t *preg;
5753    const char *string;
5754    size_t nmatch;
5755    regmatch_t pmatch[];
5756    int eflags;
5757{
5758  return regnexec(preg, string, strlen(string), nmatch, pmatch, eflags);
5759}
5760#ifdef _LIBC
5761weak_alias (__regexec, regexec)
5762#endif
5763
5764
5765/* Returns a message corresponding to an error code, ERRCODE, returned
5766   from either regcomp or regexec.   We don't use PREG here.  */
5767
5768size_t
5769regerror (errcode, preg, errbuf, errbuf_size)
5770    int errcode;
5771    const regex_t *preg;
5772    char *errbuf;
5773    size_t errbuf_size;
5774{
5775  const char *msg;
5776  size_t msg_size;
5777
5778  if (errcode < 0
5779      || errcode >= (int) (sizeof (re_error_msgid)
5780                           / sizeof (re_error_msgid[0])))
5781    /* Only error codes returned by the rest of the code should be passed
5782       to this routine.  If we are given anything else, or if other regex
5783       code generates an invalid error code, then the program has a bug.
5784       Dump core so we can fix it.  */
5785    abort ();
5786
5787  msg = gettext (re_error_msgid[errcode]);
5788
5789  msg_size = strlen (msg) + 1; /* Includes the null.  */
5790
5791  if (errbuf_size != 0)
5792    {
5793      if (msg_size > errbuf_size)
5794        {
5795#if defined HAVE_MEMPCPY || defined _LIBC
5796          *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
5797#else
5798          memcpy (errbuf, msg, errbuf_size - 1);
5799          errbuf[errbuf_size - 1] = 0;
5800#endif
5801        }
5802      else
5803        memcpy (errbuf, msg, msg_size);
5804    }
5805
5806  return msg_size;
5807}
5808#ifdef _LIBC
5809weak_alias (__regerror, regerror)
5810#endif
5811
5812
5813/* Free dynamically allocated space used by PREG.  */
5814
5815void
5816regfree (preg)
5817    regex_t *preg;
5818{
5819  if (preg->buffer != NULL)
5820    free (preg->buffer);
5821  preg->buffer = NULL;
5822
5823  preg->allocated = 0;
5824  preg->used = 0;
5825
5826  if (preg->fastmap != NULL)
5827    free (preg->fastmap);
5828  preg->fastmap = NULL;
5829  preg->fastmap_accurate = 0;
5830
5831  if (preg->translate != NULL)
5832    free (preg->translate);
5833  preg->translate = NULL;
5834}
5835#ifdef _LIBC
5836weak_alias (__regfree, regfree)
5837#endif
5838
5839#endif /* not emacs  */
Note: See TracBrowser for help on using the repository browser.