source: trunk/third/diffutils/regex.c @ 16149

Revision 16149, 167.4 KB checked in by rbasch, 24 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r16148, which included commits to RCS files with non-trunk default branches.
Line 
1/* Extended regular expression matching and search library,
2   version 0.12.
3   (Implements POSIX draft P10003.2/D11.2, except for
4   internationalization features.)
5
6   Copyright (C) 1993, 1994 Free Software Foundation, Inc.
7
8   This program is free software; you can redistribute it and/or modify
9   it under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 2, or (at your option)
11   any later version.
12
13   This program is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16   GNU General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with this program; if not, write to the Free Software
20   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
21
22/* AIX requires this to be the first thing in the file. */
23#if defined (_AIX) && !defined (REGEX_MALLOC)
24  #pragma alloca
25#endif
26
27#define _GNU_SOURCE
28
29#ifdef HAVE_CONFIG_H
30#include <config.h>
31#endif
32
33/* We need this for `regex.h', and perhaps for the Emacs include files.  */
34#include <sys/types.h>
35
36/* The `emacs' switch turns on certain matching commands
37   that make sense only in Emacs. */
38#ifdef emacs
39
40#include "lisp.h"
41#include "buffer.h"
42#include "syntax.h"
43
44/* Emacs uses `NULL' as a predicate.  */
45#undef NULL
46
47#else  /* not emacs */
48
49#ifdef STDC_HEADERS
50#include <stdlib.h>
51#else
52char *malloc ();
53char *realloc ();
54#endif
55
56
57/* We used to test for `BSTRING' here, but only GCC and Emacs define
58   `BSTRING', as far as I know, and neither of them use this code.  */
59#ifndef INHIBIT_STRING_HEADER
60#if HAVE_STRING_H || STDC_HEADERS
61#include <string.h>
62#ifndef bcmp
63#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
64#endif
65#ifndef bcopy
66#define bcopy(s, d, n)  memcpy ((d), (s), (n))
67#endif
68#ifndef bzero
69#define bzero(s, n)     memset ((s), 0, (n))
70#endif
71#else
72#include <strings.h>
73#endif
74#endif
75
76/* Define the syntax stuff for \<, \>, etc.  */
77
78/* This must be nonzero for the wordchar and notwordchar pattern
79   commands in re_match_2.  */
80#ifndef Sword
81#define Sword 1
82#endif
83
84#ifdef SYNTAX_TABLE
85
86extern char *re_syntax_table;
87
88#else /* not SYNTAX_TABLE */
89
90/* How many characters in the character set.  */
91#define CHAR_SET_SIZE 256
92
93static char re_syntax_table[CHAR_SET_SIZE];
94
95static void
96init_syntax_once ()
97{
98   register int c;
99   static int done = 0;
100
101   if (done)
102     return;
103
104   bzero (re_syntax_table, sizeof re_syntax_table);
105
106   for (c = 'a'; c <= 'z'; c++)
107     re_syntax_table[c] = Sword;
108
109   for (c = 'A'; c <= 'Z'; c++)
110     re_syntax_table[c] = Sword;
111
112   for (c = '0'; c <= '9'; c++)
113     re_syntax_table[c] = Sword;
114
115   re_syntax_table['_'] = Sword;
116
117   done = 1;
118}
119
120#endif /* not SYNTAX_TABLE */
121
122#define SYNTAX(c) re_syntax_table[c]
123
124#endif /* not emacs */
125
126/* Get the interface, including the syntax bits.  */
127#include "regex.h"
128
129/* isalpha etc. are used for the character classes.  */
130#include <ctype.h>
131
132/* Jim Meyering writes:
133
134   "... Some ctype macros are valid only for character codes that
135   isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
136   using /bin/cc or gcc but without giving an ansi option).  So, all
137   ctype uses should be through macros like ISPRINT...  If
138   STDC_HEADERS is defined, then autoconf has verified that the ctype
139   macros don't need to be guarded with references to isascii. ...
140   Defining isascii to 1 should let any compiler worth its salt
141   eliminate the && through constant folding."  */
142
143#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
144#define ISASCII(c) 1
145#else
146#define ISASCII(c) isascii(c)
147#endif
148
149#ifdef isblank
150#define ISBLANK(c) (ISASCII (c) && isblank (c))
151#else
152#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
153#endif
154#ifdef isgraph
155#define ISGRAPH(c) (ISASCII (c) && isgraph (c))
156#else
157#define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
158#endif
159
160#define ISPRINT(c) (ISASCII (c) && isprint (c))
161#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
162#define ISALNUM(c) (ISASCII (c) && isalnum (c))
163#define ISALPHA(c) (ISASCII (c) && isalpha (c))
164#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
165#define ISLOWER(c) (ISASCII (c) && islower (c))
166#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
167#define ISSPACE(c) (ISASCII (c) && isspace (c))
168#define ISUPPER(c) (ISASCII (c) && isupper (c))
169#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
170
171#ifndef NULL
172#define NULL 0
173#endif
174
175/* We remove any previous definition of `SIGN_EXTEND_CHAR',
176   since ours (we hope) works properly with all combinations of
177   machines, compilers, `char' and `unsigned char' argument types.
178   (Per Bothner suggested the basic approach.)  */
179#undef SIGN_EXTEND_CHAR
180#if __STDC__
181#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
182#else  /* not __STDC__ */
183/* As in Harbison and Steele.  */
184#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
185#endif
186
187/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
188   use `alloca' instead of `malloc'.  This is because using malloc in
189   re_search* or re_match* could cause memory leaks when C-g is used in
190   Emacs; also, malloc is slower and causes storage fragmentation.  On
191   the other hand, malloc is more portable, and easier to debug. 
192   
193   Because we sometimes use alloca, some routines have to be macros,
194   not functions -- `alloca'-allocated space disappears at the end of the
195   function it is called in.  */
196
197#ifdef REGEX_MALLOC
198
199#define REGEX_ALLOCATE malloc
200#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
201
202#else /* not REGEX_MALLOC  */
203
204/* Emacs already defines alloca, sometimes.  */
205#ifndef alloca
206
207/* Make alloca work the best possible way.  */
208#ifdef __GNUC__
209#define alloca __builtin_alloca
210#else /* not __GNUC__ */
211#if HAVE_ALLOCA_H
212#include <alloca.h>
213#else /* not __GNUC__ or HAVE_ALLOCA_H */
214#ifndef _AIX /* Already did AIX, up at the top.  */
215char *alloca ();
216#endif /* not _AIX */
217#endif /* not HAVE_ALLOCA_H */
218#endif /* not __GNUC__ */
219
220#endif /* not alloca */
221
222#define REGEX_ALLOCATE alloca
223
224/* Assumes a `char *destination' variable.  */
225#define REGEX_REALLOCATE(source, osize, nsize)                          \
226  (destination = (char *) alloca (nsize),                               \
227   bcopy (source, destination, osize),                                  \
228   destination)
229
230#endif /* not REGEX_MALLOC */
231
232
233/* True if `size1' is non-NULL and PTR is pointing anywhere inside
234   `string1' or just past its end.  This works if PTR is NULL, which is
235   a good thing.  */
236#define FIRST_STRING_P(ptr)                                     \
237  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
238
239/* (Re)Allocate N items of type T using malloc, or fail.  */
240#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
241#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
242#define RETALLOC_IF(addr, n, t) \
243  if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
244#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
245
246#define BYTEWIDTH 8 /* In bits.  */
247
248#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
249
250#undef MAX
251#undef MIN
252#define MAX(a, b) ((a) > (b) ? (a) : (b))
253#define MIN(a, b) ((a) < (b) ? (a) : (b))
254
255typedef char boolean;
256#define false 0
257#define true 1
258
259static int re_match_2_internal ();
260
261/* These are the command codes that appear in compiled regular
262   expressions.  Some opcodes are followed by argument bytes.  A
263   command code can specify any interpretation whatsoever for its
264   arguments.  Zero bytes may appear in the compiled regular expression.
265
266   The value of `exactn' is needed in search.c (search_buffer) in Emacs.
267   So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
268   `exactn' we use here must also be 1.  */
269
270typedef enum
271{
272  no_op = 0,
273
274        /* Followed by one byte giving n, then by n literal bytes.  */
275  exactn = 1,
276
277        /* Matches any (more or less) character.  */
278  anychar,
279
280        /* Matches any one char belonging to specified set.  First
281           following byte is number of bitmap bytes.  Then come bytes
282           for a bitmap saying which chars are in.  Bits in each byte
283           are ordered low-bit-first.  A character is in the set if its
284           bit is 1.  A character too large to have a bit in the map is
285           automatically not in the set.  */
286  charset,
287
288        /* Same parameters as charset, but match any character that is
289           not one of those specified.  */
290  charset_not,
291
292        /* Start remembering the text that is matched, for storing in a
293           register.  Followed by one byte with the register number, in
294           the range 0 to one less than the pattern buffer's re_nsub
295           field.  Then followed by one byte with the number of groups
296           inner to this one.  (This last has to be part of the
297           start_memory only because we need it in the on_failure_jump
298           of re_match_2.)  */
299  start_memory,
300
301        /* Stop remembering the text that is matched and store it in a
302           memory register.  Followed by one byte with the register
303           number, in the range 0 to one less than `re_nsub' in the
304           pattern buffer, and one byte with the number of inner groups,
305           just like `start_memory'.  (We need the number of inner
306           groups here because we don't have any easy way of finding the
307           corresponding start_memory when we're at a stop_memory.)  */
308  stop_memory,
309
310        /* Match a duplicate of something remembered. Followed by one
311           byte containing the register number.  */
312  duplicate,
313
314        /* Fail unless at beginning of line.  */
315  begline,
316
317        /* Fail unless at end of line.  */
318  endline,
319
320        /* Succeeds if at beginning of buffer (if emacs) or at beginning
321           of string to be matched (if not).  */
322  begbuf,
323
324        /* Analogously, for end of buffer/string.  */
325  endbuf,
326 
327        /* Followed by two byte relative address to which to jump.  */
328  jump,
329
330        /* Same as jump, but marks the end of an alternative.  */
331  jump_past_alt,
332
333        /* Followed by two-byte relative address of place to resume at
334           in case of failure.  */
335  on_failure_jump,
336       
337        /* Like on_failure_jump, but pushes a placeholder instead of the
338           current string position when executed.  */
339  on_failure_keep_string_jump,
340 
341        /* Throw away latest failure point and then jump to following
342           two-byte relative address.  */
343  pop_failure_jump,
344
345        /* Change to pop_failure_jump if know won't have to backtrack to
346           match; otherwise change to jump.  This is used to jump
347           back to the beginning of a repeat.  If what follows this jump
348           clearly won't match what the repeat does, such that we can be
349           sure that there is no use backtracking out of repetitions
350           already matched, then we change it to a pop_failure_jump.
351           Followed by two-byte address.  */
352  maybe_pop_jump,
353
354        /* Jump to following two-byte address, and push a dummy failure
355           point. This failure point will be thrown away if an attempt
356           is made to use it for a failure.  A `+' construct makes this
357           before the first repeat.  Also used as an intermediary kind
358           of jump when compiling an alternative.  */
359  dummy_failure_jump,
360
361        /* Push a dummy failure point and continue.  Used at the end of
362           alternatives.  */
363  push_dummy_failure,
364
365        /* Followed by two-byte relative address and two-byte number n.
366           After matching N times, jump to the address upon failure.  */
367  succeed_n,
368
369        /* Followed by two-byte relative address, and two-byte number n.
370           Jump to the address N times, then fail.  */
371  jump_n,
372
373        /* Set the following two-byte relative address to the
374           subsequent two-byte number.  The address *includes* the two
375           bytes of number.  */
376  set_number_at,
377
378  wordchar,     /* Matches any word-constituent character.  */
379  notwordchar,  /* Matches any char that is not a word-constituent.  */
380
381  wordbeg,      /* Succeeds if at word beginning.  */
382  wordend,      /* Succeeds if at word end.  */
383
384  wordbound,    /* Succeeds if at a word boundary.  */
385  notwordbound  /* Succeeds if not at a word boundary.  */
386
387#ifdef emacs
388  ,before_dot,  /* Succeeds if before point.  */
389  at_dot,       /* Succeeds if at point.  */
390  after_dot,    /* Succeeds if after point.  */
391
392        /* Matches any character whose syntax is specified.  Followed by
393           a byte which contains a syntax code, e.g., Sword.  */
394  syntaxspec,
395
396        /* Matches any character whose syntax is not that specified.  */
397  notsyntaxspec
398#endif /* emacs */
399} re_opcode_t;
400
401/* Common operations on the compiled pattern.  */
402
403/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
404
405#define STORE_NUMBER(destination, number)                               \
406  do {                                                                  \
407    (destination)[0] = (number) & 0377;                                 \
408    (destination)[1] = (number) >> 8;                                   \
409  } while (0)
410
411/* Same as STORE_NUMBER, except increment DESTINATION to
412   the byte after where the number is stored.  Therefore, DESTINATION
413   must be an lvalue.  */
414
415#define STORE_NUMBER_AND_INCR(destination, number)                      \
416  do {                                                                  \
417    STORE_NUMBER (destination, number);                                 \
418    (destination) += 2;                                                 \
419  } while (0)
420
421/* Put into DESTINATION a number stored in two contiguous bytes starting
422   at SOURCE.  */
423
424#define EXTRACT_NUMBER(destination, source)                             \
425  do {                                                                  \
426    (destination) = *(source) & 0377;                                   \
427    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;           \
428  } while (0)
429
430#ifdef DEBUG
431static void
432extract_number (dest, source)
433    int *dest;
434    unsigned char *source;
435{
436  int temp = SIGN_EXTEND_CHAR (*(source + 1));
437  *dest = *source & 0377;
438  *dest += temp << 8;
439}
440
441#ifndef EXTRACT_MACROS /* To debug the macros.  */
442#undef EXTRACT_NUMBER
443#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
444#endif /* not EXTRACT_MACROS */
445
446#endif /* DEBUG */
447
448/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
449   SOURCE must be an lvalue.  */
450
451#define EXTRACT_NUMBER_AND_INCR(destination, source)                    \
452  do {                                                                  \
453    EXTRACT_NUMBER (destination, source);                               \
454    (source) += 2;                                                      \
455  } while (0)
456
457#ifdef DEBUG
458static void
459extract_number_and_incr (destination, source)
460    int *destination;
461    unsigned char **source;
462{
463  extract_number (destination, *source);
464  *source += 2;
465}
466
467#ifndef EXTRACT_MACROS
468#undef EXTRACT_NUMBER_AND_INCR
469#define EXTRACT_NUMBER_AND_INCR(dest, src) \
470  extract_number_and_incr (&dest, &src)
471#endif /* not EXTRACT_MACROS */
472
473#endif /* DEBUG */
474
475/* If DEBUG is defined, Regex prints many voluminous messages about what
476   it is doing (if the variable `debug' is nonzero).  If linked with the
477   main program in `iregex.c', you can enter patterns and strings
478   interactively.  And if linked with the main program in `main.c' and
479   the other test files, you can run the already-written tests.  */
480
481#ifdef DEBUG
482
483/* We use standard I/O for debugging.  */
484#include <stdio.h>
485
486/* It is useful to test things that ``must'' be true when debugging.  */
487#include <assert.h>
488
489static int debug = 0;
490
491#define DEBUG_STATEMENT(e) e
492#define DEBUG_PRINT1(x) if (debug) printf (x)
493#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
494#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
495#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
496#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)                           \
497  if (debug) print_partial_compiled_pattern (s, e)
498#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)                  \
499  if (debug) print_double_string (w, s1, sz1, s2, sz2)
500
501
502extern void printchar ();
503
504/* Print the fastmap in human-readable form.  */
505
506void
507print_fastmap (fastmap)
508    char *fastmap;
509{
510  unsigned was_a_range = 0;
511  unsigned i = 0; 
512 
513  while (i < (1 << BYTEWIDTH))
514    {
515      if (fastmap[i++])
516        {
517          was_a_range = 0;
518          printchar (i - 1);
519          while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
520            {
521              was_a_range = 1;
522              i++;
523            }
524          if (was_a_range)
525            {
526              printf ("-");
527              printchar (i - 1);
528            }
529        }
530    }
531  putchar ('\n');
532}
533
534
535/* Print a compiled pattern string in human-readable form, starting at
536   the START pointer into it and ending just before the pointer END.  */
537
538void
539print_partial_compiled_pattern (start, end)
540    unsigned char *start;
541    unsigned char *end;
542{
543  int mcnt, mcnt2;
544  unsigned char *p = start;
545  unsigned char *pend = end;
546
547  if (start == NULL)
548    {
549      printf ("(null)\n");
550      return;
551    }
552   
553  /* Loop over pattern commands.  */
554  while (p < pend)
555    {
556      printf ("%d:\t", p - start);
557
558      switch ((re_opcode_t) *p++)
559        {
560        case no_op:
561          printf ("/no_op");
562          break;
563
564        case exactn:
565          mcnt = *p++;
566          printf ("/exactn/%d", mcnt);
567          do
568            {
569              putchar ('/');
570              printchar (*p++);
571            }
572          while (--mcnt);
573          break;
574
575        case start_memory:
576          mcnt = *p++;
577          printf ("/start_memory/%d/%d", mcnt, *p++);
578          break;
579
580        case stop_memory:
581          mcnt = *p++;
582          printf ("/stop_memory/%d/%d", mcnt, *p++);
583          break;
584
585        case duplicate:
586          printf ("/duplicate/%d", *p++);
587          break;
588
589        case anychar:
590          printf ("/anychar");
591          break;
592
593        case charset:
594        case charset_not:
595          {
596            register int c, last = -100;
597            register int in_range = 0;
598
599            printf ("/charset [%s",
600                    (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
601           
602            assert (p + *p < pend);
603
604            for (c = 0; c < 256; c++)
605              if (c / 8 < *p
606                  && (p[1 + (c/8)] & (1 << (c % 8))))
607                {
608                  /* Are we starting a range?  */
609                  if (last + 1 == c && ! in_range)
610                    {
611                      putchar ('-');
612                      in_range = 1;
613                    }
614                  /* Have we broken a range?  */
615                  else if (last + 1 != c && in_range)
616              {
617                      printchar (last);
618                      in_range = 0;
619                    }
620               
621                  if (! in_range)
622                    printchar (c);
623
624                  last = c;
625              }
626
627            if (in_range)
628              printchar (last);
629
630            putchar (']');
631
632            p += 1 + *p;
633          }
634          break;
635
636        case begline:
637          printf ("/begline");
638          break;
639
640        case endline:
641          printf ("/endline");
642          break;
643
644        case on_failure_jump:
645          extract_number_and_incr (&mcnt, &p);
646          printf ("/on_failure_jump to %d", p + mcnt - start);
647          break;
648
649        case on_failure_keep_string_jump:
650          extract_number_and_incr (&mcnt, &p);
651          printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
652          break;
653
654        case dummy_failure_jump:
655          extract_number_and_incr (&mcnt, &p);
656          printf ("/dummy_failure_jump to %d", p + mcnt - start);
657          break;
658
659        case push_dummy_failure:
660          printf ("/push_dummy_failure");
661          break;
662         
663        case maybe_pop_jump:
664          extract_number_and_incr (&mcnt, &p);
665          printf ("/maybe_pop_jump to %d", p + mcnt - start);
666          break;
667
668        case pop_failure_jump:
669          extract_number_and_incr (&mcnt, &p);
670          printf ("/pop_failure_jump to %d", p + mcnt - start);
671          break;         
672         
673        case jump_past_alt:
674          extract_number_and_incr (&mcnt, &p);
675          printf ("/jump_past_alt to %d", p + mcnt - start);
676          break;         
677         
678        case jump:
679          extract_number_and_incr (&mcnt, &p);
680          printf ("/jump to %d", p + mcnt - start);
681          break;
682
683        case succeed_n:
684          extract_number_and_incr (&mcnt, &p);
685          extract_number_and_incr (&mcnt2, &p);
686          printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2);
687          break;
688       
689        case jump_n:
690          extract_number_and_incr (&mcnt, &p);
691          extract_number_and_incr (&mcnt2, &p);
692          printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2);
693          break;
694       
695        case set_number_at:
696          extract_number_and_incr (&mcnt, &p);
697          extract_number_and_incr (&mcnt2, &p);
698          printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2);
699          break;
700       
701        case wordbound:
702          printf ("/wordbound");
703          break;
704
705        case notwordbound:
706          printf ("/notwordbound");
707          break;
708
709        case wordbeg:
710          printf ("/wordbeg");
711          break;
712         
713        case wordend:
714          printf ("/wordend");
715         
716#ifdef emacs
717        case before_dot:
718          printf ("/before_dot");
719          break;
720
721        case at_dot:
722          printf ("/at_dot");
723          break;
724
725        case after_dot:
726          printf ("/after_dot");
727          break;
728
729        case syntaxspec:
730          printf ("/syntaxspec");
731          mcnt = *p++;
732          printf ("/%d", mcnt);
733          break;
734         
735        case notsyntaxspec:
736          printf ("/notsyntaxspec");
737          mcnt = *p++;
738          printf ("/%d", mcnt);
739          break;
740#endif /* emacs */
741
742        case wordchar:
743          printf ("/wordchar");
744          break;
745         
746        case notwordchar:
747          printf ("/notwordchar");
748          break;
749
750        case begbuf:
751          printf ("/begbuf");
752          break;
753
754        case endbuf:
755          printf ("/endbuf");
756          break;
757
758        default:
759          printf ("?%d", *(p-1));
760        }
761
762      putchar ('\n');
763    }
764
765  printf ("%d:\tend of pattern.\n", p - start);
766}
767
768
769void
770print_compiled_pattern (bufp)
771    struct re_pattern_buffer *bufp;
772{
773  unsigned char *buffer = bufp->buffer;
774
775  print_partial_compiled_pattern (buffer, buffer + bufp->used);
776  printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
777
778  if (bufp->fastmap_accurate && bufp->fastmap)
779    {
780      printf ("fastmap: ");
781      print_fastmap (bufp->fastmap);
782    }
783
784  printf ("re_nsub: %d\t", bufp->re_nsub);
785  printf ("regs_alloc: %d\t", bufp->regs_allocated);
786  printf ("can_be_null: %d\t", bufp->can_be_null);
787  printf ("newline_anchor: %d\n", bufp->newline_anchor);
788  printf ("no_sub: %d\t", bufp->no_sub);
789  printf ("not_bol: %d\t", bufp->not_bol);
790  printf ("not_eol: %d\t", bufp->not_eol);
791  printf ("syntax: %d\n", bufp->syntax);
792  /* Perhaps we should print the translate table?  */
793}
794
795
796void
797print_double_string (where, string1, size1, string2, size2)
798    const char *where;
799    const char *string1;
800    const char *string2;
801    int size1;
802    int size2;
803{
804  unsigned this_char;
805 
806  if (where == NULL)
807    printf ("(null)");
808  else
809    {
810      if (FIRST_STRING_P (where))
811        {
812          for (this_char = where - string1; this_char < size1; this_char++)
813            printchar (string1[this_char]);
814
815          where = string2;   
816        }
817
818      for (this_char = where - string2; this_char < size2; this_char++)
819        printchar (string2[this_char]);
820    }
821}
822
823#else /* not DEBUG */
824
825#undef assert
826#define assert(e)
827
828#define DEBUG_STATEMENT(e)
829#define DEBUG_PRINT1(x)
830#define DEBUG_PRINT2(x1, x2)
831#define DEBUG_PRINT3(x1, x2, x3)
832#define DEBUG_PRINT4(x1, x2, x3, x4)
833#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
834#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
835
836#endif /* not DEBUG */
837
838/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
839   also be assigned to arbitrarily: each pattern buffer stores its own
840   syntax, so it can be changed between regex compilations.  */
841reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
842
843
844/* Specify the precise syntax of regexps for compilation.  This provides
845   for compatibility for various utilities which historically have
846   different, incompatible syntaxes.
847
848   The argument SYNTAX is a bit mask comprised of the various bits
849   defined in regex.h.  We return the old syntax.  */
850
851reg_syntax_t
852re_set_syntax (syntax)
853    reg_syntax_t syntax;
854{
855  reg_syntax_t ret = re_syntax_options;
856 
857  re_syntax_options = syntax;
858  return ret;
859}
860
861/* This table gives an error message for each of the error codes listed
862   in regex.h.  Obviously the order here has to be same as there.  */
863
864static const char *re_error_msg[] =
865  { NULL,                                       /* REG_NOERROR */
866    "No match",                                 /* REG_NOMATCH */
867    "Invalid regular expression",               /* REG_BADPAT */
868    "Invalid collation character",              /* REG_ECOLLATE */
869    "Invalid character class name",             /* REG_ECTYPE */
870    "Trailing backslash",                       /* REG_EESCAPE */
871    "Invalid back reference",                   /* REG_ESUBREG */
872    "Unmatched [ or [^",                        /* REG_EBRACK */
873    "Unmatched ( or \\(",                       /* REG_EPAREN */
874    "Unmatched \\{",                            /* REG_EBRACE */
875    "Invalid content of \\{\\}",                /* REG_BADBR */
876    "Invalid range end",                        /* REG_ERANGE */
877    "Memory exhausted",                         /* REG_ESPACE */
878    "Invalid preceding regular expression",     /* REG_BADRPT */
879    "Premature end of regular expression",      /* REG_EEND */
880    "Regular expression too big",               /* REG_ESIZE */
881    "Unmatched ) or \\)",                       /* REG_ERPAREN */
882  };
883
884/* Avoiding alloca during matching, to placate r_alloc.  */
885
886/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
887   searching and matching functions should not call alloca.  On some
888   systems, alloca is implemented in terms of malloc, and if we're
889   using the relocating allocator routines, then malloc could cause a
890   relocation, which might (if the strings being searched are in the
891   ralloc heap) shift the data out from underneath the regexp
892   routines.
893
894   Here's another reason to avoid allocation: Emacs
895   processes input from X in a signal handler; processing X input may
896   call malloc; if input arrives while a matching routine is calling
897   malloc, then we're scrod.  But Emacs can't just block input while
898   calling matching routines; then we don't notice interrupts when
899   they come in.  So, Emacs blocks input around all regexp calls
900   except the matching calls, which it leaves unprotected, in the
901   faith that they will not malloc.  */
902
903/* Normally, this is fine.  */
904#define MATCH_MAY_ALLOCATE
905
906/* The match routines may not allocate if (1) they would do it with malloc
907   and (2) it's not safe for htem to use malloc.  */
908#if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && (defined (emacs) || defined (REL_ALLOC))
909#undef MATCH_MAY_ALLOCATE
910#endif
911
912
913/* Failure stack declarations and macros; both re_compile_fastmap and
914   re_match_2 use a failure stack.  These have to be macros because of
915   REGEX_ALLOCATE.  */
916   
917
918/* Number of failure points for which to initially allocate space
919   when matching.  If this number is exceeded, we allocate more
920   space, so it is not a hard limit.  */
921#ifndef INIT_FAILURE_ALLOC
922#define INIT_FAILURE_ALLOC 5
923#endif
924
925/* Roughly the maximum number of failure points on the stack.  Would be
926   exactly that if always used MAX_FAILURE_SPACE each time we failed.
927   This is a variable only so users of regex can assign to it; we never
928   change it ourselves.  */
929int re_max_failures = 2000;
930
931typedef unsigned char *fail_stack_elt_t;
932
933typedef struct
934{
935  fail_stack_elt_t *stack;
936  unsigned size;
937  unsigned avail;                       /* Offset of next open position.  */
938} fail_stack_type;
939
940#define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
941#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
942#define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
943#define FAIL_STACK_TOP()       (fail_stack.stack[fail_stack.avail])
944
945
946/* Initialize `fail_stack'.  Do `return -2' if the alloc fails.  */
947
948#ifdef MATCH_MAY_ALLOCATE
949#define INIT_FAIL_STACK()                                               \
950  do {                                                                  \
951    fail_stack.stack = (fail_stack_elt_t *)                             \
952      REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));  \
953                                                                        \
954    if (fail_stack.stack == NULL)                                       \
955      return -2;                                                        \
956                                                                        \
957    fail_stack.size = INIT_FAILURE_ALLOC;                               \
958    fail_stack.avail = 0;                                               \
959  } while (0)
960#else
961#define INIT_FAIL_STACK()                                               \
962  do {                                                                  \
963    fail_stack.avail = 0;                                               \
964  } while (0)
965#endif
966
967
968/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
969
970   Return 1 if succeeds, and 0 if either ran out of memory
971   allocating space for it or it was already too large. 
972   
973   REGEX_REALLOCATE requires `destination' be declared.   */
974
975#define DOUBLE_FAIL_STACK(fail_stack)                                   \
976  ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS              \
977   ? 0                                                                  \
978   : ((fail_stack).stack = (fail_stack_elt_t *)                         \
979        REGEX_REALLOCATE ((fail_stack).stack,                           \
980          (fail_stack).size * sizeof (fail_stack_elt_t),                \
981          ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),        \
982                                                                        \
983      (fail_stack).stack == NULL                                        \
984      ? 0                                                               \
985      : ((fail_stack).size <<= 1,                                       \
986         1)))
987
988
989/* Push PATTERN_OP on FAIL_STACK.
990
991   Return 1 if was able to do so and 0 if ran out of memory allocating
992   space to do so.  */
993#define PUSH_PATTERN_OP(pattern_op, fail_stack)                         \
994  ((FAIL_STACK_FULL ()                                                  \
995    && !DOUBLE_FAIL_STACK (fail_stack))                                 \
996    ? 0                                                                 \
997    : ((fail_stack).stack[(fail_stack).avail++] = pattern_op,           \
998       1))
999
1000/* This pushes an item onto the failure stack.  Must be a four-byte
1001   value.  Assumes the variable `fail_stack'.  Probably should only
1002   be called from within `PUSH_FAILURE_POINT'.  */
1003#define PUSH_FAILURE_ITEM(item)                                         \
1004  fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
1005
1006/* The complement operation.  Assumes `fail_stack' is nonempty.  */
1007#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
1008
1009/* Used to omit pushing failure point id's when we're not debugging.  */
1010#ifdef DEBUG
1011#define DEBUG_PUSH PUSH_FAILURE_ITEM
1012#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
1013#else
1014#define DEBUG_PUSH(item)
1015#define DEBUG_POP(item_addr)
1016#endif
1017
1018
1019/* Push the information about the state we will need
1020   if we ever fail back to it. 
1021   
1022   Requires variables fail_stack, regstart, regend, reg_info, and
1023   num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
1024   declared.
1025   
1026   Does `return FAILURE_CODE' if runs out of memory.  */
1027
1028#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)   \
1029  do {                                                                  \
1030    char *destination;                                                  \
1031    /* Must be int, so when we don't save any registers, the arithmetic \
1032       of 0 + -1 isn't done as unsigned.  */                            \
1033    int this_reg;                                                       \
1034                                                                        \
1035    DEBUG_STATEMENT (failure_id++);                                     \
1036    DEBUG_STATEMENT (nfailure_points_pushed++);                         \
1037    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);           \
1038    DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
1039    DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
1040                                                                        \
1041    DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);           \
1042    DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);       \
1043                                                                        \
1044    /* Ensure we have enough space allocated for what we will push.  */ \
1045    while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)                   \
1046      {                                                                 \
1047        if (!DOUBLE_FAIL_STACK (fail_stack))                    \
1048          return failure_code;                                          \
1049                                                                        \
1050        DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",              \
1051                       (fail_stack).size);                              \
1052        DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1053      }                                                                 \
1054                                                                        \
1055    /* Push the info, starting with the registers.  */                  \
1056    DEBUG_PRINT1 ("\n");                                                \
1057                                                                        \
1058    for (this_reg = lowest_active_reg; this_reg <= highest_active_reg;  \
1059         this_reg++)                                                    \
1060      {                                                                 \
1061        DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);                 \
1062        DEBUG_STATEMENT (num_regs_pushed++);                            \
1063                                                                        \
1064        DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);         \
1065        PUSH_FAILURE_ITEM (regstart[this_reg]);                         \
1066                                                                        \
1067        DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);             \
1068        PUSH_FAILURE_ITEM (regend[this_reg]);                           \
1069                                                                        \
1070        DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);    \
1071        DEBUG_PRINT2 (" match_null=%d",                                 \
1072                      REG_MATCH_NULL_STRING_P (reg_info[this_reg]));    \
1073        DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));    \
1074        DEBUG_PRINT2 (" matched_something=%d",                          \
1075                      MATCHED_SOMETHING (reg_info[this_reg]));          \
1076        DEBUG_PRINT2 (" ever_matched=%d",                               \
1077                      EVER_MATCHED_SOMETHING (reg_info[this_reg]));     \
1078        DEBUG_PRINT1 ("\n");                                            \
1079        PUSH_FAILURE_ITEM (reg_info[this_reg].word);                    \
1080      }                                                                 \
1081                                                                        \
1082    DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
1083    PUSH_FAILURE_ITEM (lowest_active_reg);                              \
1084                                                                        \
1085    DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
1086    PUSH_FAILURE_ITEM (highest_active_reg);                             \
1087                                                                        \
1088    DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);           \
1089    DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);           \
1090    PUSH_FAILURE_ITEM (pattern_place);                                  \
1091                                                                        \
1092    DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);            \
1093    DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
1094                                 size2);                                \
1095    DEBUG_PRINT1 ("'\n");                                               \
1096    PUSH_FAILURE_ITEM (string_place);                                   \
1097                                                                        \
1098    DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);            \
1099    DEBUG_PUSH (failure_id);                                            \
1100  } while (0)
1101
1102/* This is the number of items that are pushed and popped on the stack
1103   for each register.  */
1104#define NUM_REG_ITEMS  3
1105
1106/* Individual items aside from the registers.  */
1107#ifdef DEBUG
1108#define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
1109#else
1110#define NUM_NONREG_ITEMS 4
1111#endif
1112
1113/* We push at most this many items on the stack.  */
1114#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1115
1116/* We actually push this many items.  */
1117#define NUM_FAILURE_ITEMS                                               \
1118  ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS         \
1119    + NUM_NONREG_ITEMS)
1120
1121/* How many items can still be added to the stack without overflowing it.  */
1122#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1123
1124
1125/* Pops what PUSH_FAIL_STACK pushes.
1126
1127   We restore into the parameters, all of which should be lvalues:
1128     STR -- the saved data position.
1129     PAT -- the saved pattern position.
1130     LOW_REG, HIGH_REG -- the highest and lowest active registers.
1131     REGSTART, REGEND -- arrays of string positions.
1132     REG_INFO -- array of information about each subexpression.
1133   
1134   Also assumes the variables `fail_stack' and (if debugging), `bufp',
1135   `pend', `string1', `size1', `string2', and `size2'.  */
1136
1137#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1138{                                                                       \
1139  DEBUG_STATEMENT (fail_stack_elt_t failure_id;)                        \
1140  int this_reg;                                                         \
1141  const unsigned char *string_temp;                                     \
1142                                                                        \
1143  assert (!FAIL_STACK_EMPTY ());                                        \
1144                                                                        \
1145  /* Remove failure points and point to how many regs pushed.  */       \
1146  DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");                                \
1147  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);    \
1148  DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);     \
1149                                                                        \
1150  assert (fail_stack.avail >= NUM_NONREG_ITEMS);                        \
1151                                                                        \
1152  DEBUG_POP (&failure_id);                                              \
1153  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);              \
1154                                                                        \
1155  /* If the saved string location is NULL, it came from an              \
1156     on_failure_keep_string_jump opcode, and we want to throw away the  \
1157     saved NULL, thus retaining our current position in the string.  */ \
1158  string_temp = POP_FAILURE_ITEM ();                                    \
1159  if (string_temp != NULL)                                              \
1160    str = (const char *) string_temp;                                   \
1161                                                                        \
1162  DEBUG_PRINT2 ("  Popping string 0x%x: `", str);                       \
1163  DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);      \
1164  DEBUG_PRINT1 ("'\n");                                                 \
1165                                                                        \
1166  pat = (unsigned char *) POP_FAILURE_ITEM ();                          \
1167  DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);                       \
1168  DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);                       \
1169                                                                        \
1170  /* Restore register info.  */                                         \
1171  high_reg = (unsigned) POP_FAILURE_ITEM ();                            \
1172  DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);           \
1173                                                                        \
1174  low_reg = (unsigned) POP_FAILURE_ITEM ();                             \
1175  DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);            \
1176                                                                        \
1177  for (this_reg = high_reg; this_reg >= low_reg; this_reg--)            \
1178    {                                                                   \
1179      DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);                 \
1180                                                                        \
1181      reg_info[this_reg].word = POP_FAILURE_ITEM ();                    \
1182      DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);          \
1183                                                                        \
1184      regend[this_reg] = (const char *) POP_FAILURE_ITEM ();            \
1185      DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);             \
1186                                                                        \
1187      regstart[this_reg] = (const char *) POP_FAILURE_ITEM ();          \
1188      DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);         \
1189    }                                                                   \
1190                                                                        \
1191  DEBUG_STATEMENT (nfailure_points_popped++);                           \
1192} /* POP_FAILURE_POINT */
1193
1194
1195
1196/* Structure for per-register (a.k.a. per-group) information.
1197   This must not be longer than one word, because we push this value
1198   onto the failure stack.  Other register information, such as the
1199   starting and ending positions (which are addresses), and the list of
1200   inner groups (which is a bits list) are maintained in separate
1201   variables. 
1202   
1203   We are making a (strictly speaking) nonportable assumption here: that
1204   the compiler will pack our bit fields into something that fits into
1205   the type of `word', i.e., is something that fits into one item on the
1206   failure stack.  */
1207typedef union
1208{
1209  fail_stack_elt_t word;
1210  struct
1211  {
1212      /* This field is one if this group can match the empty string,
1213         zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1214#define MATCH_NULL_UNSET_VALUE 3
1215    unsigned match_null_string_p : 2;
1216    unsigned is_active : 1;
1217    unsigned matched_something : 1;
1218    unsigned ever_matched_something : 1;
1219  } bits;
1220} register_info_type;
1221
1222#define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1223#define IS_ACTIVE(R)  ((R).bits.is_active)
1224#define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1225#define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1226
1227
1228/* Call this when have matched a real character; it sets `matched' flags
1229   for the subexpressions which we are currently inside.  Also records
1230   that those subexprs have matched.  */
1231#define SET_REGS_MATCHED()                                              \
1232  do                                                                    \
1233    {                                                                   \
1234      unsigned r;                                                       \
1235      for (r = lowest_active_reg; r <= highest_active_reg; r++)         \
1236        {                                                               \
1237          MATCHED_SOMETHING (reg_info[r])                               \
1238            = EVER_MATCHED_SOMETHING (reg_info[r])                      \
1239            = 1;                                                        \
1240        }                                                               \
1241    }                                                                   \
1242  while (0)
1243
1244
1245/* Registers are set to a sentinel when they haven't yet matched.  */
1246#define REG_UNSET_VALUE ((char *) -1)
1247#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1248
1249
1250
1251/* How do we implement a missing MATCH_MAY_ALLOCATE?
1252   We make the fail stack a global thing, and then grow it to
1253   re_max_failures when we compile.  */
1254#ifndef MATCH_MAY_ALLOCATE
1255static fail_stack_type fail_stack;
1256
1257static const char **     regstart, **     regend;
1258static const char ** old_regstart, ** old_regend;
1259static const char **best_regstart, **best_regend;
1260static register_info_type *reg_info;
1261static const char **reg_dummy;
1262static register_info_type *reg_info_dummy;
1263#endif
1264
1265
1266/* Subroutine declarations and macros for regex_compile.  */
1267
1268static void store_op1 (), store_op2 ();
1269static void insert_op1 (), insert_op2 ();
1270static boolean at_begline_loc_p (), at_endline_loc_p ();
1271static boolean group_in_compile_stack ();
1272static reg_errcode_t compile_range ();
1273
1274/* Fetch the next character in the uncompiled pattern---translating it
1275   if necessary.  Also cast from a signed character in the constant
1276   string passed to us by the user to an unsigned char that we can use
1277   as an array index (in, e.g., `translate').  */
1278#define PATFETCH(c)                                                     \
1279  do {if (p == pend) return REG_EEND;                                   \
1280    c = (unsigned char) *p++;                                           \
1281    if (translate) c = translate[c];                                    \
1282  } while (0)
1283
1284/* Fetch the next character in the uncompiled pattern, with no
1285   translation.  */
1286#define PATFETCH_RAW(c)                                                 \
1287  do {if (p == pend) return REG_EEND;                                   \
1288    c = (unsigned char) *p++;                                           \
1289  } while (0)
1290
1291/* Go backwards one character in the pattern.  */
1292#define PATUNFETCH p--
1293
1294
1295/* If `translate' is non-null, return translate[D], else just D.  We
1296   cast the subscript to translate because some data is declared as
1297   `char *', to avoid warnings when a string constant is passed.  But
1298   when we use a character as a subscript we must make it unsigned.  */
1299#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
1300
1301
1302/* Macros for outputting the compiled pattern into `buffer'.  */
1303
1304/* If the buffer isn't allocated when it comes in, use this.  */
1305#define INIT_BUF_SIZE  32
1306
1307/* Make sure we have at least N more bytes of space in buffer.  */
1308#define GET_BUFFER_SPACE(n)                                             \
1309    while (b - bufp->buffer + (n) > bufp->allocated)                    \
1310      EXTEND_BUFFER ()
1311
1312/* Make sure we have one more byte of buffer space and then add C to it.  */
1313#define BUF_PUSH(c)                                                     \
1314  do {                                                                  \
1315    GET_BUFFER_SPACE (1);                                               \
1316    *b++ = (unsigned char) (c);                                         \
1317  } while (0)
1318
1319
1320/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1321#define BUF_PUSH_2(c1, c2)                                              \
1322  do {                                                                  \
1323    GET_BUFFER_SPACE (2);                                               \
1324    *b++ = (unsigned char) (c1);                                        \
1325    *b++ = (unsigned char) (c2);                                        \
1326  } while (0)
1327
1328
1329/* As with BUF_PUSH_2, except for three bytes.  */
1330#define BUF_PUSH_3(c1, c2, c3)                                          \
1331  do {                                                                  \
1332    GET_BUFFER_SPACE (3);                                               \
1333    *b++ = (unsigned char) (c1);                                        \
1334    *b++ = (unsigned char) (c2);                                        \
1335    *b++ = (unsigned char) (c3);                                        \
1336  } while (0)
1337
1338
1339/* Store a jump with opcode OP at LOC to location TO.  We store a
1340   relative address offset by the three bytes the jump itself occupies.  */
1341#define STORE_JUMP(op, loc, to) \
1342  store_op1 (op, loc, (to) - (loc) - 3)
1343
1344/* Likewise, for a two-argument jump.  */
1345#define STORE_JUMP2(op, loc, to, arg) \
1346  store_op2 (op, loc, (to) - (loc) - 3, arg)
1347
1348/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
1349#define INSERT_JUMP(op, loc, to) \
1350  insert_op1 (op, loc, (to) - (loc) - 3, b)
1351
1352/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
1353#define INSERT_JUMP2(op, loc, to, arg) \
1354  insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
1355
1356
1357/* This is not an arbitrary limit: the arguments which represent offsets
1358   into the pattern are two bytes long.  So if 2^16 bytes turns out to
1359   be too small, many things would have to change.  */
1360#define MAX_BUF_SIZE (1L << 16)
1361
1362
1363/* Extend the buffer by twice its current size via realloc and
1364   reset the pointers that pointed into the old block to point to the
1365   correct places in the new one.  If extending the buffer results in it
1366   being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
1367#define EXTEND_BUFFER()                                                 \
1368  do {                                                                  \
1369    unsigned char *old_buffer = bufp->buffer;                           \
1370    if (bufp->allocated == MAX_BUF_SIZE)                                \
1371      return REG_ESIZE;                                                 \
1372    bufp->allocated <<= 1;                                              \
1373    if (bufp->allocated > MAX_BUF_SIZE)                                 \
1374      bufp->allocated = MAX_BUF_SIZE;                                   \
1375    bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
1376    if (bufp->buffer == NULL)                                           \
1377      return REG_ESPACE;                                                \
1378    /* If the buffer moved, move all the pointers into it.  */          \
1379    if (old_buffer != bufp->buffer)                                     \
1380      {                                                                 \
1381        b = (b - old_buffer) + bufp->buffer;                            \
1382        begalt = (begalt - old_buffer) + bufp->buffer;                  \
1383        if (fixup_alt_jump)                                             \
1384          fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
1385        if (laststart)                                                  \
1386          laststart = (laststart - old_buffer) + bufp->buffer;          \
1387        if (pending_exact)                                              \
1388          pending_exact = (pending_exact - old_buffer) + bufp->buffer;  \
1389      }                                                                 \
1390  } while (0)
1391
1392
1393/* Since we have one byte reserved for the register number argument to
1394   {start,stop}_memory, the maximum number of groups we can report
1395   things about is what fits in that byte.  */
1396#define MAX_REGNUM 255
1397
1398/* But patterns can have more than `MAX_REGNUM' registers.  We just
1399   ignore the excess.  */
1400typedef unsigned regnum_t;
1401
1402
1403/* Macros for the compile stack.  */
1404
1405/* Since offsets can go either forwards or backwards, this type needs to
1406   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
1407typedef int pattern_offset_t;
1408
1409typedef struct
1410{
1411  pattern_offset_t begalt_offset;
1412  pattern_offset_t fixup_alt_jump;
1413  pattern_offset_t inner_group_offset;
1414  pattern_offset_t laststart_offset; 
1415  regnum_t regnum;
1416} compile_stack_elt_t;
1417
1418
1419typedef struct
1420{
1421  compile_stack_elt_t *stack;
1422  unsigned size;
1423  unsigned avail;                       /* Offset of next open position.  */
1424} compile_stack_type;
1425
1426
1427#define INIT_COMPILE_STACK_SIZE 32
1428
1429#define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1430#define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1431
1432/* The next available element.  */
1433#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1434
1435
1436/* Set the bit for character C in a list.  */
1437#define SET_LIST_BIT(c)                               \
1438  (b[((unsigned char) (c)) / BYTEWIDTH]               \
1439   |= 1 << (((unsigned char) c) % BYTEWIDTH))
1440
1441
1442/* Get the next unsigned number in the uncompiled pattern.  */
1443#define GET_UNSIGNED_NUMBER(num)                                        \
1444  { if (p != pend)                                                      \
1445     {                                                                  \
1446       PATFETCH (c);                                                    \
1447       while (ISDIGIT (c))                                              \
1448         {                                                              \
1449           if (num < 0)                                                 \
1450              num = 0;                                                  \
1451           num = num * 10 + c - '0';                                    \
1452           if (p == pend)                                               \
1453              break;                                                    \
1454           PATFETCH (c);                                                \
1455         }                                                              \
1456       }                                                                \
1457    }           
1458
1459#define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1460
1461#define IS_CHAR_CLASS(string)                                           \
1462   (STREQ (string, "alpha") || STREQ (string, "upper")                  \
1463    || STREQ (string, "lower") || STREQ (string, "digit")               \
1464    || STREQ (string, "alnum") || STREQ (string, "xdigit")              \
1465    || STREQ (string, "space") || STREQ (string, "print")               \
1466    || STREQ (string, "punct") || STREQ (string, "graph")               \
1467    || STREQ (string, "cntrl") || STREQ (string, "blank"))
1468
1469/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1470   Returns one of error codes defined in `regex.h', or zero for success.
1471
1472   Assumes the `allocated' (and perhaps `buffer') and `translate'
1473   fields are set in BUFP on entry.
1474
1475   If it succeeds, results are put in BUFP (if it returns an error, the
1476   contents of BUFP are undefined):
1477     `buffer' is the compiled pattern;
1478     `syntax' is set to SYNTAX;
1479     `used' is set to the length of the compiled pattern;
1480     `fastmap_accurate' is zero;
1481     `re_nsub' is the number of subexpressions in PATTERN;
1482     `not_bol' and `not_eol' are zero;
1483   
1484   The `fastmap' and `newline_anchor' fields are neither
1485   examined nor set.  */
1486
1487/* Return, freeing storage we allocated.  */
1488#define FREE_STACK_RETURN(value)                \
1489  return (free (compile_stack.stack), value)
1490
1491static reg_errcode_t
1492regex_compile (pattern, size, syntax, bufp)
1493     const char *pattern;
1494     int size;
1495     reg_syntax_t syntax;
1496     struct re_pattern_buffer *bufp;
1497{
1498  /* We fetch characters from PATTERN here.  Even though PATTERN is
1499     `char *' (i.e., signed), we declare these variables as unsigned, so
1500     they can be reliably used as array indices.  */
1501  register unsigned char c, c1;
1502 
1503  /* A random temporary spot in PATTERN.  */
1504  const char *p1;
1505
1506  /* Points to the end of the buffer, where we should append.  */
1507  register unsigned char *b;
1508 
1509  /* Keeps track of unclosed groups.  */
1510  compile_stack_type compile_stack;
1511
1512  /* Points to the current (ending) position in the pattern.  */
1513  const char *p = pattern;
1514  const char *pend = pattern + size;
1515 
1516  /* How to translate the characters in the pattern.  */
1517  char *translate = bufp->translate;
1518
1519  /* Address of the count-byte of the most recently inserted `exactn'
1520     command.  This makes it possible to tell if a new exact-match
1521     character can be added to that command or if the character requires
1522     a new `exactn' command.  */
1523  unsigned char *pending_exact = 0;
1524
1525  /* Address of start of the most recently finished expression.
1526     This tells, e.g., postfix * where to find the start of its
1527     operand.  Reset at the beginning of groups and alternatives.  */
1528  unsigned char *laststart = 0;
1529
1530  /* Address of beginning of regexp, or inside of last group.  */
1531  unsigned char *begalt;
1532
1533  /* Place in the uncompiled pattern (i.e., the {) to
1534     which to go back if the interval is invalid.  */
1535  const char *beg_interval;
1536               
1537  /* Address of the place where a forward jump should go to the end of
1538     the containing expression.  Each alternative of an `or' -- except the
1539     last -- ends with a forward jump of this sort.  */
1540  unsigned char *fixup_alt_jump = 0;
1541
1542  /* Counts open-groups as they are encountered.  Remembered for the
1543     matching close-group on the compile stack, so the same register
1544     number is put in the stop_memory as the start_memory.  */
1545  regnum_t regnum = 0;
1546
1547#ifdef DEBUG
1548  DEBUG_PRINT1 ("\nCompiling pattern: ");
1549  if (debug)
1550    {
1551      unsigned debug_count;
1552     
1553      for (debug_count = 0; debug_count < size; debug_count++)
1554        printchar (pattern[debug_count]);
1555      putchar ('\n');
1556    }
1557#endif /* DEBUG */
1558
1559  /* Initialize the compile stack.  */
1560  compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1561  if (compile_stack.stack == NULL)
1562    return REG_ESPACE;
1563
1564  compile_stack.size = INIT_COMPILE_STACK_SIZE;
1565  compile_stack.avail = 0;
1566
1567  /* Initialize the pattern buffer.  */
1568  bufp->syntax = syntax;
1569  bufp->fastmap_accurate = 0;
1570  bufp->not_bol = bufp->not_eol = 0;
1571
1572  /* Set `used' to zero, so that if we return an error, the pattern
1573     printer (for debugging) will think there's no pattern.  We reset it
1574     at the end.  */
1575  bufp->used = 0;
1576 
1577  /* Always count groups, whether or not bufp->no_sub is set.  */
1578  bufp->re_nsub = 0;                           
1579
1580#if !defined (emacs) && !defined (SYNTAX_TABLE)
1581  /* Initialize the syntax table.  */
1582   init_syntax_once ();
1583#endif
1584
1585  if (bufp->allocated == 0)
1586    {
1587      if (bufp->buffer)
1588        { /* If zero allocated, but buffer is non-null, try to realloc
1589             enough space.  This loses if buffer's address is bogus, but
1590             that is the user's responsibility.  */
1591          RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1592        }
1593      else
1594        { /* Caller did not allocate a buffer.  Do it for them.  */
1595          bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1596        }
1597      if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
1598
1599      bufp->allocated = INIT_BUF_SIZE;
1600    }
1601
1602  begalt = b = bufp->buffer;
1603
1604  /* Loop through the uncompiled pattern until we're at the end.  */
1605  while (p != pend)
1606    {
1607      PATFETCH (c);
1608
1609      switch (c)
1610        {
1611        case '^':
1612          {
1613            if (   /* If at start of pattern, it's an operator.  */
1614                   p == pattern + 1
1615                   /* If context independent, it's an operator.  */
1616                || syntax & RE_CONTEXT_INDEP_ANCHORS
1617                   /* Otherwise, depends on what's come before.  */
1618                || at_begline_loc_p (pattern, p, syntax))
1619              BUF_PUSH (begline);
1620            else
1621              goto normal_char;
1622          }
1623          break;
1624
1625
1626        case '$':
1627          {
1628            if (   /* If at end of pattern, it's an operator.  */
1629                   p == pend
1630                   /* If context independent, it's an operator.  */
1631                || syntax & RE_CONTEXT_INDEP_ANCHORS
1632                   /* Otherwise, depends on what's next.  */
1633                || at_endline_loc_p (p, pend, syntax))
1634               BUF_PUSH (endline);
1635             else
1636               goto normal_char;
1637           }
1638           break;
1639
1640
1641        case '+':
1642        case '?':
1643          if ((syntax & RE_BK_PLUS_QM)
1644              || (syntax & RE_LIMITED_OPS))
1645            goto normal_char;
1646        handle_plus:
1647        case '*':
1648          /* If there is no previous pattern... */
1649          if (!laststart)
1650            {
1651              if (syntax & RE_CONTEXT_INVALID_OPS)
1652                FREE_STACK_RETURN (REG_BADRPT);
1653              else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1654                goto normal_char;
1655            }
1656
1657          {
1658            /* Are we optimizing this jump?  */
1659            boolean keep_string_p = false;
1660           
1661            /* 1 means zero (many) matches is allowed.  */
1662            char zero_times_ok = 0, many_times_ok = 0;
1663
1664            /* If there is a sequence of repetition chars, collapse it
1665               down to just one (the right one).  We can't combine
1666               interval operators with these because of, e.g., `a{2}*',
1667               which should only match an even number of `a's.  */
1668
1669            for (;;)
1670              {
1671                zero_times_ok |= c != '+';
1672                many_times_ok |= c != '?';
1673
1674                if (p == pend)
1675                  break;
1676
1677                PATFETCH (c);
1678
1679                if (c == '*'
1680                    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
1681                  ;
1682
1683                else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
1684                  {
1685                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
1686
1687                    PATFETCH (c1);
1688                    if (!(c1 == '+' || c1 == '?'))
1689                      {
1690                        PATUNFETCH;
1691                        PATUNFETCH;
1692                        break;
1693                      }
1694
1695                    c = c1;
1696                  }
1697                else
1698                  {
1699                    PATUNFETCH;
1700                    break;
1701                  }
1702
1703                /* If we get here, we found another repeat character.  */
1704               }
1705
1706            /* Star, etc. applied to an empty pattern is equivalent
1707               to an empty pattern.  */
1708            if (!laststart) 
1709              break;
1710
1711            /* Now we know whether or not zero matches is allowed
1712               and also whether or not two or more matches is allowed.  */
1713            if (many_times_ok)
1714              { /* More than one repetition is allowed, so put in at the
1715                   end a backward relative jump from `b' to before the next
1716                   jump we're going to put in below (which jumps from
1717                   laststart to after this jump). 
1718
1719                   But if we are at the `*' in the exact sequence `.*\n',
1720                   insert an unconditional jump backwards to the .,
1721                   instead of the beginning of the loop.  This way we only
1722                   push a failure point once, instead of every time
1723                   through the loop.  */
1724                assert (p - 1 > pattern);
1725
1726                /* Allocate the space for the jump.  */
1727                GET_BUFFER_SPACE (3);
1728
1729                /* We know we are not at the first character of the pattern,
1730                   because laststart was nonzero.  And we've already
1731                   incremented `p', by the way, to be the character after
1732                   the `*'.  Do we have to do something analogous here
1733                   for null bytes, because of RE_DOT_NOT_NULL?  */
1734                if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
1735                    && zero_times_ok
1736                    && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
1737                    && !(syntax & RE_DOT_NEWLINE))
1738                  { /* We have .*\n.  */
1739                    STORE_JUMP (jump, b, laststart);
1740                    keep_string_p = true;
1741                  }
1742                else
1743                  /* Anything else.  */
1744                  STORE_JUMP (maybe_pop_jump, b, laststart - 3);
1745
1746                /* We've added more stuff to the buffer.  */
1747                b += 3;
1748              }
1749
1750            /* On failure, jump from laststart to b + 3, which will be the
1751               end of the buffer after this jump is inserted.  */
1752            GET_BUFFER_SPACE (3);
1753            INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
1754                                       : on_failure_jump,
1755                         laststart, b + 3);
1756            pending_exact = 0;
1757            b += 3;
1758
1759            if (!zero_times_ok)
1760              {
1761                /* At least one repetition is required, so insert a
1762                   `dummy_failure_jump' before the initial
1763                   `on_failure_jump' instruction of the loop. This
1764                   effects a skip over that instruction the first time
1765                   we hit that loop.  */
1766                GET_BUFFER_SPACE (3);
1767                INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
1768                b += 3;
1769              }
1770            }
1771          break;
1772
1773
1774        case '.':
1775          laststart = b;
1776          BUF_PUSH (anychar);
1777          break;
1778
1779
1780        case '[':
1781          {
1782            boolean had_char_class = false;
1783
1784            if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
1785
1786            /* Ensure that we have enough space to push a charset: the
1787               opcode, the length count, and the bitset; 34 bytes in all.  */
1788            GET_BUFFER_SPACE (34);
1789
1790            laststart = b;
1791
1792            /* We test `*p == '^' twice, instead of using an if
1793               statement, so we only need one BUF_PUSH.  */
1794            BUF_PUSH (*p == '^' ? charset_not : charset);
1795            if (*p == '^')
1796              p++;
1797
1798            /* Remember the first position in the bracket expression.  */
1799            p1 = p;
1800
1801            /* Push the number of bytes in the bitmap.  */
1802            BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
1803
1804            /* Clear the whole map.  */
1805            bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
1806
1807            /* charset_not matches newline according to a syntax bit.  */
1808            if ((re_opcode_t) b[-2] == charset_not
1809                && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
1810              SET_LIST_BIT ('\n');
1811
1812            /* Read in characters and ranges, setting map bits.  */
1813            for (;;)
1814              {
1815                if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
1816
1817                PATFETCH (c);
1818
1819                /* \ might escape characters inside [...] and [^...].  */
1820                if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
1821                  {
1822                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
1823
1824                    PATFETCH (c1);
1825                    SET_LIST_BIT (c1);
1826                    continue;
1827                  }
1828
1829                /* Could be the end of the bracket expression.  If it's
1830                   not (i.e., when the bracket expression is `[]' so
1831                   far), the ']' character bit gets set way below.  */
1832                if (c == ']' && p != p1 + 1)
1833                  break;
1834
1835                /* Look ahead to see if it's a range when the last thing
1836                   was a character class.  */
1837                if (had_char_class && c == '-' && *p != ']')
1838                  FREE_STACK_RETURN (REG_ERANGE);
1839
1840                /* Look ahead to see if it's a range when the last thing
1841                   was a character: if this is a hyphen not at the
1842                   beginning or the end of a list, then it's the range
1843                   operator.  */
1844                if (c == '-'
1845                    && !(p - 2 >= pattern && p[-2] == '[')
1846                    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
1847                    && *p != ']')
1848                  {
1849                    reg_errcode_t ret
1850                      = compile_range (&p, pend, translate, syntax, b);
1851                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
1852                  }
1853
1854                else if (p[0] == '-' && p[1] != ']')
1855                  { /* This handles ranges made up of characters only.  */
1856                    reg_errcode_t ret;
1857
1858                    /* Move past the `-'.  */
1859                    PATFETCH (c1);
1860                   
1861                    ret = compile_range (&p, pend, translate, syntax, b);
1862                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
1863                  }
1864
1865                /* See if we're at the beginning of a possible character
1866                   class.  */
1867
1868                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
1869                  { /* Leave room for the null.  */
1870                    char str[CHAR_CLASS_MAX_LENGTH + 1];
1871
1872                    PATFETCH (c);
1873                    c1 = 0;
1874
1875                    /* If pattern is `[[:'.  */
1876                    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
1877
1878                    for (;;)
1879                      {
1880                        PATFETCH (c);
1881                        if (c == ':' || c == ']' || p == pend
1882                            || c1 == CHAR_CLASS_MAX_LENGTH)
1883                          break;
1884                        str[c1++] = c;
1885                      }
1886                    str[c1] = '\0';
1887
1888                    /* If isn't a word bracketed by `[:' and:`]':
1889                       undo the ending character, the letters, and leave
1890                       the leading `:' and `[' (but set bits for them).  */
1891                    if (c == ':' && *p == ']')
1892                      {
1893                        int ch;
1894                        boolean is_alnum = STREQ (str, "alnum");
1895                        boolean is_alpha = STREQ (str, "alpha");
1896                        boolean is_blank = STREQ (str, "blank");
1897                        boolean is_cntrl = STREQ (str, "cntrl");
1898                        boolean is_digit = STREQ (str, "digit");
1899                        boolean is_graph = STREQ (str, "graph");
1900                        boolean is_lower = STREQ (str, "lower");
1901                        boolean is_print = STREQ (str, "print");
1902                        boolean is_punct = STREQ (str, "punct");
1903                        boolean is_space = STREQ (str, "space");
1904                        boolean is_upper = STREQ (str, "upper");
1905                        boolean is_xdigit = STREQ (str, "xdigit");
1906                       
1907                        if (!IS_CHAR_CLASS (str))
1908                          FREE_STACK_RETURN (REG_ECTYPE);
1909
1910                        /* Throw away the ] at the end of the character
1911                           class.  */
1912                        PATFETCH (c);                                   
1913
1914                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
1915
1916                        for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
1917                          {
1918                            /* This was split into 3 if's to
1919                               avoid an arbitrary limit in some compiler.  */
1920                            if (   (is_alnum  && ISALNUM (ch))
1921                                || (is_alpha  && ISALPHA (ch))
1922                                || (is_blank  && ISBLANK (ch))
1923                                || (is_cntrl  && ISCNTRL (ch)))
1924                              SET_LIST_BIT (ch);
1925                            if (   (is_digit  && ISDIGIT (ch))
1926                                || (is_graph  && ISGRAPH (ch))
1927                                || (is_lower  && ISLOWER (ch))
1928                                || (is_print  && ISPRINT (ch)))
1929                              SET_LIST_BIT (ch);
1930                            if (   (is_punct  && ISPUNCT (ch))
1931                                || (is_space  && ISSPACE (ch))
1932                                || (is_upper  && ISUPPER (ch))
1933                                || (is_xdigit && ISXDIGIT (ch)))
1934                              SET_LIST_BIT (ch);
1935                          }
1936                        had_char_class = true;
1937                      }
1938                    else
1939                      {
1940                        c1++;
1941                        while (c1--)   
1942                          PATUNFETCH;
1943                        SET_LIST_BIT ('[');
1944                        SET_LIST_BIT (':');
1945                        had_char_class = false;
1946                      }
1947                  }
1948                else
1949                  {
1950                    had_char_class = false;
1951                    SET_LIST_BIT (c);
1952                  }
1953              }
1954
1955            /* Discard any (non)matching list bytes that are all 0 at the
1956               end of the map.  Decrease the map-length byte too.  */
1957            while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
1958              b[-1]--;
1959            b += b[-1];
1960          }
1961          break;
1962
1963
1964        case '(':
1965          if (syntax & RE_NO_BK_PARENS)
1966            goto handle_open;
1967          else
1968            goto normal_char;
1969
1970
1971        case ')':
1972          if (syntax & RE_NO_BK_PARENS)
1973            goto handle_close;
1974          else
1975            goto normal_char;
1976
1977
1978        case '\n':
1979          if (syntax & RE_NEWLINE_ALT)
1980            goto handle_alt;
1981          else
1982            goto normal_char;
1983
1984
1985        case '|':
1986          if (syntax & RE_NO_BK_VBAR)
1987            goto handle_alt;
1988          else
1989            goto normal_char;
1990
1991
1992        case '{':
1993           if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
1994             goto handle_interval;
1995           else
1996             goto normal_char;
1997
1998
1999        case '\\':
2000          if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2001
2002          /* Do not translate the character after the \, so that we can
2003             distinguish, e.g., \B from \b, even if we normally would
2004             translate, e.g., B to b.  */
2005          PATFETCH_RAW (c);
2006
2007          switch (c)
2008            {
2009            case '(':
2010              if (syntax & RE_NO_BK_PARENS)
2011                goto normal_backslash;
2012
2013            handle_open:
2014              bufp->re_nsub++;
2015              regnum++;
2016
2017              if (COMPILE_STACK_FULL)
2018                {
2019                  RETALLOC (compile_stack.stack, compile_stack.size << 1,
2020                            compile_stack_elt_t);
2021                  if (compile_stack.stack == NULL) return REG_ESPACE;
2022
2023                  compile_stack.size <<= 1;
2024                }
2025
2026              /* These are the values to restore when we hit end of this
2027                 group.  They are all relative offsets, so that if the
2028                 whole pattern moves because of realloc, they will still
2029                 be valid.  */
2030              COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
2031              COMPILE_STACK_TOP.fixup_alt_jump
2032                = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2033              COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2034              COMPILE_STACK_TOP.regnum = regnum;
2035
2036              /* We will eventually replace the 0 with the number of
2037                 groups inner to this one.  But do not push a
2038                 start_memory for groups beyond the last one we can
2039                 represent in the compiled pattern.  */
2040              if (regnum <= MAX_REGNUM)
2041                {
2042                  COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
2043                  BUF_PUSH_3 (start_memory, regnum, 0);
2044                }
2045               
2046              compile_stack.avail++;
2047
2048              fixup_alt_jump = 0;
2049              laststart = 0;
2050              begalt = b;
2051              /* If we've reached MAX_REGNUM groups, then this open
2052                 won't actually generate any code, so we'll have to
2053                 clear pending_exact explicitly.  */
2054              pending_exact = 0;
2055              break;
2056
2057
2058            case ')':
2059              if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
2060
2061              if (COMPILE_STACK_EMPTY)
2062                if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2063                  goto normal_backslash;
2064                else
2065                  FREE_STACK_RETURN (REG_ERPAREN);
2066
2067            handle_close:
2068              if (fixup_alt_jump)
2069                { /* Push a dummy failure point at the end of the
2070                     alternative for a possible future
2071                     `pop_failure_jump' to pop.  See comments at
2072                     `push_dummy_failure' in `re_match_2'.  */
2073                  BUF_PUSH (push_dummy_failure);
2074                 
2075                  /* We allocated space for this jump when we assigned
2076                     to `fixup_alt_jump', in the `handle_alt' case below.  */
2077                  STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
2078                }
2079
2080              /* See similar code for backslashed left paren above.  */
2081              if (COMPILE_STACK_EMPTY)
2082                if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2083                  goto normal_char;
2084                else
2085                  FREE_STACK_RETURN (REG_ERPAREN);
2086
2087              /* Since we just checked for an empty stack above, this
2088                 ``can't happen''.  */
2089              assert (compile_stack.avail != 0);
2090              {
2091                /* We don't just want to restore into `regnum', because
2092                   later groups should continue to be numbered higher,
2093                   as in `(ab)c(de)' -- the second group is #2.  */
2094                regnum_t this_group_regnum;
2095
2096                compile_stack.avail--;         
2097                begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
2098                fixup_alt_jump
2099                  = COMPILE_STACK_TOP.fixup_alt_jump
2100                    ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
2101                    : 0;
2102                laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
2103                this_group_regnum = COMPILE_STACK_TOP.regnum;
2104                /* If we've reached MAX_REGNUM groups, then this open
2105                   won't actually generate any code, so we'll have to
2106                   clear pending_exact explicitly.  */
2107                pending_exact = 0;
2108
2109                /* We're at the end of the group, so now we know how many
2110                   groups were inside this one.  */
2111                if (this_group_regnum <= MAX_REGNUM)
2112                  {
2113                    unsigned char *inner_group_loc
2114                      = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
2115                   
2116                    *inner_group_loc = regnum - this_group_regnum;
2117                    BUF_PUSH_3 (stop_memory, this_group_regnum,
2118                                regnum - this_group_regnum);
2119                  }
2120              }
2121              break;
2122
2123
2124            case '|':                                   /* `\|'.  */
2125              if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
2126                goto normal_backslash;
2127            handle_alt:
2128              if (syntax & RE_LIMITED_OPS)
2129                goto normal_char;
2130
2131              /* Insert before the previous alternative a jump which
2132                 jumps to this alternative if the former fails.  */
2133              GET_BUFFER_SPACE (3);
2134              INSERT_JUMP (on_failure_jump, begalt, b + 6);
2135              pending_exact = 0;
2136              b += 3;
2137
2138              /* The alternative before this one has a jump after it
2139                 which gets executed if it gets matched.  Adjust that
2140                 jump so it will jump to this alternative's analogous
2141                 jump (put in below, which in turn will jump to the next
2142                 (if any) alternative's such jump, etc.).  The last such
2143                 jump jumps to the correct final destination.  A picture:
2144                          _____ _____
2145                          |   | |   |   
2146                          |   v |   v
2147                         a | b   | c   
2148
2149                 If we are at `b', then fixup_alt_jump right now points to a
2150                 three-byte space after `a'.  We'll put in the jump, set
2151                 fixup_alt_jump to right after `b', and leave behind three
2152                 bytes which we'll fill in when we get to after `c'.  */
2153
2154              if (fixup_alt_jump)
2155                STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2156
2157              /* Mark and leave space for a jump after this alternative,
2158                 to be filled in later either by next alternative or
2159                 when know we're at the end of a series of alternatives.  */
2160              fixup_alt_jump = b;
2161              GET_BUFFER_SPACE (3);
2162              b += 3;
2163
2164              laststart = 0;
2165              begalt = b;
2166              break;
2167
2168
2169            case '{':
2170              /* If \{ is a literal.  */
2171              if (!(syntax & RE_INTERVALS)
2172                     /* If we're at `\{' and it's not the open-interval
2173                        operator.  */
2174                  || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
2175                  || (p - 2 == pattern  &&  p == pend))
2176                goto normal_backslash;
2177
2178            handle_interval:
2179              {
2180                /* If got here, then the syntax allows intervals.  */
2181
2182                /* At least (most) this many matches must be made.  */
2183                int lower_bound = -1, upper_bound = -1;
2184
2185                beg_interval = p - 1;
2186
2187                if (p == pend)
2188                  {
2189                    if (syntax & RE_NO_BK_BRACES)
2190                      goto unfetch_interval;
2191                    else
2192                      FREE_STACK_RETURN (REG_EBRACE);
2193                  }
2194
2195                GET_UNSIGNED_NUMBER (lower_bound);
2196
2197                if (c == ',')
2198                  {
2199                    GET_UNSIGNED_NUMBER (upper_bound);
2200                    if (upper_bound < 0) upper_bound = RE_DUP_MAX;
2201                  }
2202                else
2203                  /* Interval such as `{1}' => match exactly once. */
2204                  upper_bound = lower_bound;
2205
2206                if (lower_bound < 0 || upper_bound > RE_DUP_MAX
2207                    || lower_bound > upper_bound)
2208                  {
2209                    if (syntax & RE_NO_BK_BRACES)
2210                      goto unfetch_interval;
2211                    else
2212                      FREE_STACK_RETURN (REG_BADBR);
2213                  }
2214
2215                if (!(syntax & RE_NO_BK_BRACES))
2216                  {
2217                    if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
2218
2219                    PATFETCH (c);
2220                  }
2221
2222                if (c != '}')
2223                  {
2224                    if (syntax & RE_NO_BK_BRACES)
2225                      goto unfetch_interval;
2226                    else
2227                      FREE_STACK_RETURN (REG_BADBR);
2228                  }
2229
2230                /* We just parsed a valid interval.  */
2231
2232                /* If it's invalid to have no preceding re.  */
2233                if (!laststart)
2234                  {
2235                    if (syntax & RE_CONTEXT_INVALID_OPS)
2236                      FREE_STACK_RETURN (REG_BADRPT);
2237                    else if (syntax & RE_CONTEXT_INDEP_OPS)
2238                      laststart = b;
2239                    else
2240                      goto unfetch_interval;
2241                  }
2242
2243                /* If the upper bound is zero, don't want to succeed at
2244                   all; jump from `laststart' to `b + 3', which will be
2245                   the end of the buffer after we insert the jump.  */
2246                 if (upper_bound == 0)
2247                   {
2248                     GET_BUFFER_SPACE (3);
2249                     INSERT_JUMP (jump, laststart, b + 3);
2250                     b += 3;
2251                   }
2252
2253                 /* Otherwise, we have a nontrivial interval.  When
2254                    we're all done, the pattern will look like:
2255                      set_number_at <jump count> <upper bound>
2256                      set_number_at <succeed_n count> <lower bound>
2257                      succeed_n <after jump addr> <succeed_n count>
2258                      <body of loop>
2259                      jump_n <succeed_n addr> <jump count>
2260                    (The upper bound and `jump_n' are omitted if
2261                    `upper_bound' is 1, though.)  */
2262                 else
2263                   { /* If the upper bound is > 1, we need to insert
2264                        more at the end of the loop.  */
2265                     unsigned nbytes = 10 + (upper_bound > 1) * 10;
2266
2267                     GET_BUFFER_SPACE (nbytes);
2268
2269                     /* Initialize lower bound of the `succeed_n', even
2270                        though it will be set during matching by its
2271                        attendant `set_number_at' (inserted next),
2272                        because `re_compile_fastmap' needs to know.
2273                        Jump to the `jump_n' we might insert below.  */
2274                     INSERT_JUMP2 (succeed_n, laststart,
2275                                   b + 5 + (upper_bound > 1) * 5,
2276                                   lower_bound);
2277                     b += 5;
2278
2279                     /* Code to initialize the lower bound.  Insert
2280                        before the `succeed_n'.  The `5' is the last two
2281                        bytes of this `set_number_at', plus 3 bytes of
2282                        the following `succeed_n'.  */
2283                     insert_op2 (set_number_at, laststart, 5, lower_bound, b);
2284                     b += 5;
2285
2286                     if (upper_bound > 1)
2287                       { /* More than one repetition is allowed, so
2288                            append a backward jump to the `succeed_n'
2289                            that starts this interval.
2290                           
2291                            When we've reached this during matching,
2292                            we'll have matched the interval once, so
2293                            jump back only `upper_bound - 1' times.  */
2294                         STORE_JUMP2 (jump_n, b, laststart + 5,
2295                                      upper_bound - 1);
2296                         b += 5;
2297
2298                         /* The location we want to set is the second
2299                            parameter of the `jump_n'; that is `b-2' as
2300                            an absolute address.  `laststart' will be
2301                            the `set_number_at' we're about to insert;
2302                            `laststart+3' the number to set, the source
2303                            for the relative address.  But we are
2304                            inserting into the middle of the pattern --
2305                            so everything is getting moved up by 5.
2306                            Conclusion: (b - 2) - (laststart + 3) + 5,
2307                            i.e., b - laststart.
2308                           
2309                            We insert this at the beginning of the loop
2310                            so that if we fail during matching, we'll
2311                            reinitialize the bounds.  */
2312                         insert_op2 (set_number_at, laststart, b - laststart,
2313                                     upper_bound - 1, b);
2314                         b += 5;
2315                       }
2316                   }
2317                pending_exact = 0;
2318                beg_interval = NULL;
2319              }
2320              break;
2321
2322            unfetch_interval:
2323              /* If an invalid interval, match the characters as literals.  */
2324               assert (beg_interval);
2325               p = beg_interval;
2326               beg_interval = NULL;
2327
2328               /* normal_char and normal_backslash need `c'.  */
2329               PATFETCH (c);   
2330
2331               if (!(syntax & RE_NO_BK_BRACES))
2332                 {
2333                   if (p > pattern  &&  p[-1] == '\\')
2334                     goto normal_backslash;
2335                 }
2336               goto normal_char;
2337
2338#ifdef emacs
2339            /* There is no way to specify the before_dot and after_dot
2340               operators.  rms says this is ok.  --karl  */
2341            case '=':
2342              BUF_PUSH (at_dot);
2343              break;
2344
2345            case 's':   
2346              laststart = b;
2347              PATFETCH (c);
2348              BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
2349              break;
2350
2351            case 'S':
2352              laststart = b;
2353              PATFETCH (c);
2354              BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
2355              break;
2356#endif /* emacs */
2357
2358
2359            case 'w':
2360              laststart = b;
2361              BUF_PUSH (wordchar);
2362              break;
2363
2364
2365            case 'W':
2366              laststart = b;
2367              BUF_PUSH (notwordchar);
2368              break;
2369
2370
2371            case '<':
2372              BUF_PUSH (wordbeg);
2373              break;
2374
2375            case '>':
2376              BUF_PUSH (wordend);
2377              break;
2378
2379            case 'b':
2380              BUF_PUSH (wordbound);
2381              break;
2382
2383            case 'B':
2384              BUF_PUSH (notwordbound);
2385              break;
2386
2387            case '`':
2388              BUF_PUSH (begbuf);
2389              break;
2390
2391            case '\'':
2392              BUF_PUSH (endbuf);
2393              break;
2394
2395            case '1': case '2': case '3': case '4': case '5':
2396            case '6': case '7': case '8': case '9':
2397              if (syntax & RE_NO_BK_REFS)
2398                goto normal_char;
2399
2400              c1 = c - '0';
2401
2402              if (c1 > regnum)
2403                FREE_STACK_RETURN (REG_ESUBREG);
2404
2405              /* Can't back reference to a subexpression if inside of it.  */
2406              if (group_in_compile_stack (compile_stack, c1))
2407                goto normal_char;
2408
2409              laststart = b;
2410              BUF_PUSH_2 (duplicate, c1);
2411              break;
2412
2413
2414            case '+':
2415            case '?':
2416              if (syntax & RE_BK_PLUS_QM)
2417                goto handle_plus;
2418              else
2419                goto normal_backslash;
2420
2421            default:
2422            normal_backslash:
2423              /* You might think it would be useful for \ to mean
2424                 not to translate; but if we don't translate it
2425                 it will never match anything.  */
2426              c = TRANSLATE (c);
2427              goto normal_char;
2428            }
2429          break;
2430
2431
2432        default:
2433        /* Expects the character in `c'.  */
2434        normal_char:
2435              /* If no exactn currently being built.  */
2436          if (!pending_exact
2437
2438              /* If last exactn not at current position.  */
2439              || pending_exact + *pending_exact + 1 != b
2440             
2441              /* We have only one byte following the exactn for the count.  */
2442              || *pending_exact == (1 << BYTEWIDTH) - 1
2443
2444              /* If followed by a repetition operator.  */
2445              || *p == '*' || *p == '^'
2446              || ((syntax & RE_BK_PLUS_QM)
2447                  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2448                  : (*p == '+' || *p == '?'))
2449              || ((syntax & RE_INTERVALS)
2450                  && ((syntax & RE_NO_BK_BRACES)
2451                      ? *p == '{'
2452                      : (p[0] == '\\' && p[1] == '{'))))
2453            {
2454              /* Start building a new exactn.  */
2455             
2456              laststart = b;
2457
2458              BUF_PUSH_2 (exactn, 0);
2459              pending_exact = b - 1;
2460            }
2461           
2462          BUF_PUSH (c);
2463          (*pending_exact)++;
2464          break;
2465        } /* switch (c) */
2466    } /* while p != pend */
2467
2468 
2469  /* Through the pattern now.  */
2470 
2471  if (fixup_alt_jump)
2472    STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2473
2474  if (!COMPILE_STACK_EMPTY)
2475    FREE_STACK_RETURN (REG_EPAREN);
2476
2477  free (compile_stack.stack);
2478
2479  /* We have succeeded; set the length of the buffer.  */
2480  bufp->used = b - bufp->buffer;
2481
2482#ifdef DEBUG
2483  if (debug)
2484    {
2485      DEBUG_PRINT1 ("\nCompiled pattern: \n");
2486      print_compiled_pattern (bufp);
2487    }
2488#endif /* DEBUG */
2489
2490#ifndef MATCH_MAY_ALLOCATE
2491  /* Initialize the failure stack to the largest possible stack.  This
2492     isn't necessary unless we're trying to avoid calling alloca in
2493     the search and match routines.  */
2494  {
2495    int num_regs = bufp->re_nsub + 1;
2496
2497    /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
2498       is strictly greater than re_max_failures, the largest possible stack
2499       is 2 * re_max_failures failure points.  */
2500    if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
2501      {
2502        fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
2503
2504#ifdef emacs
2505        if (! fail_stack.stack)
2506          fail_stack.stack
2507            = (fail_stack_elt_t *) xmalloc (fail_stack.size
2508                                            * sizeof (fail_stack_elt_t));
2509        else
2510          fail_stack.stack
2511            = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
2512                                             (fail_stack.size
2513                                              * sizeof (fail_stack_elt_t)));
2514#else /* not emacs */
2515        if (! fail_stack.stack)
2516          fail_stack.stack
2517            = (fail_stack_elt_t *) malloc (fail_stack.size
2518                                           * sizeof (fail_stack_elt_t));
2519        else
2520          fail_stack.stack
2521            = (fail_stack_elt_t *) realloc (fail_stack.stack,
2522                                            (fail_stack.size
2523                                             * sizeof (fail_stack_elt_t)));
2524#endif /* not emacs */
2525      }
2526
2527    /* Initialize some other variables the matcher uses.  */
2528    RETALLOC_IF (regstart,       num_regs, const char *);
2529    RETALLOC_IF (regend,         num_regs, const char *);
2530    RETALLOC_IF (old_regstart,   num_regs, const char *);
2531    RETALLOC_IF (old_regend,     num_regs, const char *);
2532    RETALLOC_IF (best_regstart,  num_regs, const char *);
2533    RETALLOC_IF (best_regend,    num_regs, const char *);
2534    RETALLOC_IF (reg_info,       num_regs, register_info_type);
2535    RETALLOC_IF (reg_dummy,      num_regs, const char *);
2536    RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
2537  }
2538#endif
2539
2540  return REG_NOERROR;
2541} /* regex_compile */
2542
2543/* Subroutines for `regex_compile'.  */
2544
2545/* Store OP at LOC followed by two-byte integer parameter ARG.  */
2546
2547static void
2548store_op1 (op, loc, arg)
2549    re_opcode_t op;
2550    unsigned char *loc;
2551    int arg;
2552{
2553  *loc = (unsigned char) op;
2554  STORE_NUMBER (loc + 1, arg);
2555}
2556
2557
2558/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
2559
2560static void
2561store_op2 (op, loc, arg1, arg2)
2562    re_opcode_t op;
2563    unsigned char *loc;
2564    int arg1, arg2;
2565{
2566  *loc = (unsigned char) op;
2567  STORE_NUMBER (loc + 1, arg1);
2568  STORE_NUMBER (loc + 3, arg2);
2569}
2570
2571
2572/* Copy the bytes from LOC to END to open up three bytes of space at LOC
2573   for OP followed by two-byte integer parameter ARG.  */
2574
2575static void
2576insert_op1 (op, loc, arg, end)
2577    re_opcode_t op;
2578    unsigned char *loc;
2579    int arg;
2580    unsigned char *end;   
2581{
2582  register unsigned char *pfrom = end;
2583  register unsigned char *pto = end + 3;
2584
2585  while (pfrom != loc)
2586    *--pto = *--pfrom;
2587   
2588  store_op1 (op, loc, arg);
2589}
2590
2591
2592/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
2593
2594static void
2595insert_op2 (op, loc, arg1, arg2, end)
2596    re_opcode_t op;
2597    unsigned char *loc;
2598    int arg1, arg2;
2599    unsigned char *end;   
2600{
2601  register unsigned char *pfrom = end;
2602  register unsigned char *pto = end + 5;
2603
2604  while (pfrom != loc)
2605    *--pto = *--pfrom;
2606   
2607  store_op2 (op, loc, arg1, arg2);
2608}
2609
2610
2611/* P points to just after a ^ in PATTERN.  Return true if that ^ comes
2612   after an alternative or a begin-subexpression.  We assume there is at
2613   least one character before the ^.  */
2614
2615static boolean
2616at_begline_loc_p (pattern, p, syntax)
2617    const char *pattern, *p;
2618    reg_syntax_t syntax;
2619{
2620  const char *prev = p - 2;
2621  boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
2622 
2623  return
2624       /* After a subexpression?  */
2625       (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
2626       /* After an alternative?  */
2627    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
2628}
2629
2630
2631/* The dual of at_begline_loc_p.  This one is for $.  We assume there is
2632   at least one character after the $, i.e., `P < PEND'.  */
2633
2634static boolean
2635at_endline_loc_p (p, pend, syntax)
2636    const char *p, *pend;
2637    int syntax;
2638{
2639  const char *next = p;
2640  boolean next_backslash = *next == '\\';
2641  const char *next_next = p + 1 < pend ? p + 1 : NULL;
2642 
2643  return
2644       /* Before a subexpression?  */
2645       (syntax & RE_NO_BK_PARENS ? *next == ')'
2646        : next_backslash && next_next && *next_next == ')')
2647       /* Before an alternative?  */
2648    || (syntax & RE_NO_BK_VBAR ? *next == '|'
2649        : next_backslash && next_next && *next_next == '|');
2650}
2651
2652
2653/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
2654   false if it's not.  */
2655
2656static boolean
2657group_in_compile_stack (compile_stack, regnum)
2658    compile_stack_type compile_stack;
2659    regnum_t regnum;
2660{
2661  int this_element;
2662
2663  for (this_element = compile_stack.avail - 1; 
2664       this_element >= 0;
2665       this_element--)
2666    if (compile_stack.stack[this_element].regnum == regnum)
2667      return true;
2668
2669  return false;
2670}
2671
2672
2673/* Read the ending character of a range (in a bracket expression) from the
2674   uncompiled pattern *P_PTR (which ends at PEND).  We assume the
2675   starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
2676   Then we set the translation of all bits between the starting and
2677   ending characters (inclusive) in the compiled pattern B.
2678   
2679   Return an error code.
2680   
2681   We use these short variable names so we can use the same macros as
2682   `regex_compile' itself.  */
2683
2684static reg_errcode_t
2685compile_range (p_ptr, pend, translate, syntax, b)
2686    const char **p_ptr, *pend;
2687    char *translate;
2688    reg_syntax_t syntax;
2689    unsigned char *b;
2690{
2691  unsigned this_char;
2692
2693  const char *p = *p_ptr;
2694  int range_start, range_end;
2695 
2696  if (p == pend)
2697    return REG_ERANGE;
2698
2699  /* Even though the pattern is a signed `char *', we need to fetch
2700     with unsigned char *'s; if the high bit of the pattern character
2701     is set, the range endpoints will be negative if we fetch using a
2702     signed char *.
2703
2704     We also want to fetch the endpoints without translating them; the
2705     appropriate translation is done in the bit-setting loop below.  */
2706  /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
2707  range_start = ((const unsigned char *) p)[-2];
2708  range_end   = ((const unsigned char *) p)[0];
2709
2710  /* Have to increment the pointer into the pattern string, so the
2711     caller isn't still at the ending character.  */
2712  (*p_ptr)++;
2713
2714  /* If the start is after the end, the range is empty.  */
2715  if (range_start > range_end)
2716    return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
2717
2718  /* Here we see why `this_char' has to be larger than an `unsigned
2719     char' -- the range is inclusive, so if `range_end' == 0xff
2720     (assuming 8-bit characters), we would otherwise go into an infinite
2721     loop, since all characters <= 0xff.  */
2722  for (this_char = range_start; this_char <= range_end; this_char++)
2723    {
2724      SET_LIST_BIT (TRANSLATE (this_char));
2725    }
2726 
2727  return REG_NOERROR;
2728}
2729
2730/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
2731   BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
2732   characters can start a string that matches the pattern.  This fastmap
2733   is used by re_search to skip quickly over impossible starting points.
2734
2735   The caller must supply the address of a (1 << BYTEWIDTH)-byte data
2736   area as BUFP->fastmap.
2737   
2738   We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
2739   the pattern buffer.
2740
2741   Returns 0 if we succeed, -2 if an internal error.   */
2742
2743int
2744re_compile_fastmap (bufp)
2745     struct re_pattern_buffer *bufp;
2746{
2747  int j, k;
2748#ifdef MATCH_MAY_ALLOCATE
2749  fail_stack_type fail_stack;
2750#endif
2751#ifndef REGEX_MALLOC
2752  char *destination;
2753#endif
2754  /* We don't push any register information onto the failure stack.  */
2755  unsigned num_regs = 0;
2756 
2757  register char *fastmap = bufp->fastmap;
2758  unsigned char *pattern = bufp->buffer;
2759  unsigned long size = bufp->used;
2760  unsigned char *p = pattern;
2761  register unsigned char *pend = pattern + size;
2762
2763  /* Assume that each path through the pattern can be null until
2764     proven otherwise.  We set this false at the bottom of switch
2765     statement, to which we get only if a particular path doesn't
2766     match the empty string.  */
2767  boolean path_can_be_null = true;
2768
2769  /* We aren't doing a `succeed_n' to begin with.  */
2770  boolean succeed_n_p = false;
2771
2772  assert (fastmap != NULL && p != NULL);
2773 
2774  INIT_FAIL_STACK ();
2775  bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
2776  bufp->fastmap_accurate = 1;       /* It will be when we're done.  */
2777  bufp->can_be_null = 0;
2778     
2779  while (p != pend || !FAIL_STACK_EMPTY ())
2780    {
2781      if (p == pend)
2782        {
2783          bufp->can_be_null |= path_can_be_null;
2784         
2785          /* Reset for next path.  */
2786          path_can_be_null = true;
2787         
2788          p = fail_stack.stack[--fail_stack.avail];
2789        }
2790
2791      /* We should never be about to go beyond the end of the pattern.  */
2792      assert (p < pend);
2793     
2794#ifdef SWITCH_ENUM_BUG
2795      switch ((int) ((re_opcode_t) *p++))
2796#else
2797      switch ((re_opcode_t) *p++)
2798#endif
2799        {
2800
2801        /* I guess the idea here is to simply not bother with a fastmap
2802           if a backreference is used, since it's too hard to figure out
2803           the fastmap for the corresponding group.  Setting
2804           `can_be_null' stops `re_search_2' from using the fastmap, so
2805           that is all we do.  */
2806        case duplicate:
2807          bufp->can_be_null = 1;
2808          return 0;
2809
2810
2811      /* Following are the cases which match a character.  These end
2812         with `break'.  */
2813
2814        case exactn:
2815          fastmap[p[1]] = 1;
2816          break;
2817
2818
2819        case charset:
2820          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2821            if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
2822              fastmap[j] = 1;
2823          break;
2824
2825
2826        case charset_not:
2827          /* Chars beyond end of map must be allowed.  */
2828          for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
2829            fastmap[j] = 1;
2830
2831          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2832            if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
2833              fastmap[j] = 1;
2834          break;
2835
2836
2837        case wordchar:
2838          for (j = 0; j < (1 << BYTEWIDTH); j++)
2839            if (SYNTAX (j) == Sword)
2840              fastmap[j] = 1;
2841          break;
2842
2843
2844        case notwordchar:
2845          for (j = 0; j < (1 << BYTEWIDTH); j++)
2846            if (SYNTAX (j) != Sword)
2847              fastmap[j] = 1;
2848          break;
2849
2850
2851        case anychar:
2852          {
2853            int fastmap_newline = fastmap['\n'];
2854
2855            /* `.' matches anything ...  */
2856            for (j = 0; j < (1 << BYTEWIDTH); j++)
2857              fastmap[j] = 1;
2858
2859            /* ... except perhaps newline.  */
2860            if (!(bufp->syntax & RE_DOT_NEWLINE))
2861              fastmap['\n'] = fastmap_newline;
2862
2863            /* Return if we have already set `can_be_null'; if we have,
2864               then the fastmap is irrelevant.  Something's wrong here.  */
2865            else if (bufp->can_be_null)
2866              return 0;
2867
2868            /* Otherwise, have to check alternative paths.  */
2869            break;
2870          }
2871
2872#ifdef emacs
2873        case syntaxspec:
2874          k = *p++;
2875          for (j = 0; j < (1 << BYTEWIDTH); j++)
2876            if (SYNTAX (j) == (enum syntaxcode) k)
2877              fastmap[j] = 1;
2878          break;
2879
2880
2881        case notsyntaxspec:
2882          k = *p++;
2883          for (j = 0; j < (1 << BYTEWIDTH); j++)
2884            if (SYNTAX (j) != (enum syntaxcode) k)
2885              fastmap[j] = 1;
2886          break;
2887
2888
2889      /* All cases after this match the empty string.  These end with
2890         `continue'.  */
2891
2892
2893        case before_dot:
2894        case at_dot:
2895        case after_dot:
2896          continue;
2897#endif /* not emacs */
2898
2899
2900        case no_op:
2901        case begline:
2902        case endline:
2903        case begbuf:
2904        case endbuf:
2905        case wordbound:
2906        case notwordbound:
2907        case wordbeg:
2908        case wordend:
2909        case push_dummy_failure:
2910          continue;
2911
2912
2913        case jump_n:
2914        case pop_failure_jump:
2915        case maybe_pop_jump:
2916        case jump:
2917        case jump_past_alt:
2918        case dummy_failure_jump:
2919          EXTRACT_NUMBER_AND_INCR (j, p);
2920          p += j;       
2921          if (j > 0)
2922            continue;
2923           
2924          /* Jump backward implies we just went through the body of a
2925             loop and matched nothing.  Opcode jumped to should be
2926             `on_failure_jump' or `succeed_n'.  Just treat it like an
2927             ordinary jump.  For a * loop, it has pushed its failure
2928             point already; if so, discard that as redundant.  */
2929          if ((re_opcode_t) *p != on_failure_jump
2930              && (re_opcode_t) *p != succeed_n)
2931            continue;
2932
2933          p++;
2934          EXTRACT_NUMBER_AND_INCR (j, p);
2935          p += j;               
2936         
2937          /* If what's on the stack is where we are now, pop it.  */
2938          if (!FAIL_STACK_EMPTY ()
2939              && fail_stack.stack[fail_stack.avail - 1] == p)
2940            fail_stack.avail--;
2941
2942          continue;
2943
2944
2945        case on_failure_jump:
2946        case on_failure_keep_string_jump:
2947        handle_on_failure_jump:
2948          EXTRACT_NUMBER_AND_INCR (j, p);
2949
2950          /* For some patterns, e.g., `(a?)?', `p+j' here points to the
2951             end of the pattern.  We don't want to push such a point,
2952             since when we restore it above, entering the switch will
2953             increment `p' past the end of the pattern.  We don't need
2954             to push such a point since we obviously won't find any more
2955             fastmap entries beyond `pend'.  Such a pattern can match
2956             the null string, though.  */
2957          if (p + j < pend)
2958            {
2959              if (!PUSH_PATTERN_OP (p + j, fail_stack))
2960                return -2;
2961            }
2962          else
2963            bufp->can_be_null = 1;
2964
2965          if (succeed_n_p)
2966            {
2967              EXTRACT_NUMBER_AND_INCR (k, p);   /* Skip the n.  */
2968              succeed_n_p = false;
2969            }
2970
2971          continue;
2972
2973
2974        case succeed_n:
2975          /* Get to the number of times to succeed.  */
2976          p += 2;               
2977
2978          /* Increment p past the n for when k != 0.  */
2979          EXTRACT_NUMBER_AND_INCR (k, p);
2980          if (k == 0)
2981            {
2982              p -= 4;
2983              succeed_n_p = true;  /* Spaghetti code alert.  */
2984              goto handle_on_failure_jump;
2985            }
2986          continue;
2987
2988
2989        case set_number_at:
2990          p += 4;
2991          continue;
2992
2993
2994        case start_memory:
2995        case stop_memory:
2996          p += 2;
2997          continue;
2998
2999
3000        default:
3001          abort (); /* We have listed all the cases.  */
3002        } /* switch *p++ */
3003
3004      /* Getting here means we have found the possible starting
3005         characters for one path of the pattern -- and that the empty
3006         string does not match.  We need not follow this path further.
3007         Instead, look at the next alternative (remembered on the
3008         stack), or quit if no more.  The test at the top of the loop
3009         does these things.  */
3010      path_can_be_null = false;
3011      p = pend;
3012    } /* while p */
3013
3014  /* Set `can_be_null' for the last path (also the first path, if the
3015     pattern is empty).  */
3016  bufp->can_be_null |= path_can_be_null;
3017  return 0;
3018} /* re_compile_fastmap */
3019
3020/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
3021   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
3022   this memory for recording register information.  STARTS and ENDS
3023   must be allocated using the malloc library routine, and must each
3024   be at least NUM_REGS * sizeof (regoff_t) bytes long.
3025
3026   If NUM_REGS == 0, then subsequent matches should allocate their own
3027   register data.
3028
3029   Unless this function is called, the first search or match using
3030   PATTERN_BUFFER will allocate its own register data, without
3031   freeing the old data.  */
3032
3033void
3034re_set_registers (bufp, regs, num_regs, starts, ends)
3035    struct re_pattern_buffer *bufp;
3036    struct re_registers *regs;
3037    unsigned num_regs;
3038    regoff_t *starts, *ends;
3039{
3040  if (num_regs)
3041    {
3042      bufp->regs_allocated = REGS_REALLOCATE;
3043      regs->num_regs = num_regs;
3044      regs->start = starts;
3045      regs->end = ends;
3046    }
3047  else
3048    {
3049      bufp->regs_allocated = REGS_UNALLOCATED;
3050      regs->num_regs = 0;
3051      regs->start = regs->end = (regoff_t *) 0;
3052    }
3053}
3054
3055/* Searching routines.  */
3056
3057/* Like re_search_2, below, but only one string is specified, and
3058   doesn't let you say where to stop matching. */
3059
3060int
3061re_search (bufp, string, size, startpos, range, regs)
3062     struct re_pattern_buffer *bufp;
3063     const char *string;
3064     int size, startpos, range;
3065     struct re_registers *regs;
3066{
3067  return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
3068                      regs, size);
3069}
3070
3071
3072/* Using the compiled pattern in BUFP->buffer, first tries to match the
3073   virtual concatenation of STRING1 and STRING2, starting first at index
3074   STARTPOS, then at STARTPOS + 1, and so on.
3075   
3076   STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
3077   
3078   RANGE is how far to scan while trying to match.  RANGE = 0 means try
3079   only at STARTPOS; in general, the last start tried is STARTPOS +
3080   RANGE.
3081   
3082   In REGS, return the indices of the virtual concatenation of STRING1
3083   and STRING2 that matched the entire BUFP->buffer and its contained
3084   subexpressions.
3085   
3086   Do not consider matching one past the index STOP in the virtual
3087   concatenation of STRING1 and STRING2.
3088
3089   We return either the position in the strings at which the match was
3090   found, -1 if no match, or -2 if error (such as failure
3091   stack overflow).  */
3092
3093int
3094re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
3095     struct re_pattern_buffer *bufp;
3096     const char *string1, *string2;
3097     int size1, size2;
3098     int startpos;
3099     int range;
3100     struct re_registers *regs;
3101     int stop;
3102{
3103  int val;
3104  register char *fastmap = bufp->fastmap;
3105  register char *translate = bufp->translate;
3106  int total_size = size1 + size2;
3107  int endpos = startpos + range;
3108
3109  /* Check for out-of-range STARTPOS.  */
3110  if (startpos < 0 || startpos > total_size)
3111    return -1;
3112   
3113  /* Fix up RANGE if it might eventually take us outside
3114     the virtual concatenation of STRING1 and STRING2.  */
3115  if (endpos < -1)
3116    range = -1 - startpos;
3117  else if (endpos > total_size)
3118    range = total_size - startpos;
3119
3120  /* If the search isn't to be a backwards one, don't waste time in a
3121     search for a pattern that must be anchored.  */
3122  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
3123    {
3124      if (startpos > 0)
3125        return -1;
3126      else
3127        range = 1;
3128    }
3129
3130  /* Update the fastmap now if not correct already.  */
3131  if (fastmap && !bufp->fastmap_accurate)
3132    if (re_compile_fastmap (bufp) == -2)
3133      return -2;
3134 
3135  /* Loop through the string, looking for a place to start matching.  */
3136  for (;;)
3137    {
3138      /* If a fastmap is supplied, skip quickly over characters that
3139         cannot be the start of a match.  If the pattern can match the
3140         null string, however, we don't need to skip characters; we want
3141         the first null string.  */
3142      if (fastmap && startpos < total_size && !bufp->can_be_null)
3143        {
3144          if (range > 0)        /* Searching forwards.  */
3145            {
3146              register const char *d;
3147              register int lim = 0;
3148              int irange = range;
3149
3150              if (startpos < size1 && startpos + range >= size1)
3151                lim = range - (size1 - startpos);
3152
3153              d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
3154   
3155              /* Written out as an if-else to avoid testing `translate'
3156                 inside the loop.  */
3157              if (translate)
3158                while (range > lim
3159                       && !fastmap[(unsigned char)
3160                                   translate[(unsigned char) *d++]])
3161                  range--;
3162              else
3163                while (range > lim && !fastmap[(unsigned char) *d++])
3164                  range--;
3165
3166              startpos += irange - range;
3167            }
3168          else                          /* Searching backwards.  */
3169            {
3170              register char c = (size1 == 0 || startpos >= size1
3171                                 ? string2[startpos - size1]
3172                                 : string1[startpos]);
3173
3174              if (!fastmap[(unsigned char) TRANSLATE (c)])
3175                goto advance;
3176            }
3177        }
3178
3179      /* If can't match the null string, and that's all we have left, fail.  */
3180      if (range >= 0 && startpos == total_size && fastmap
3181          && !bufp->can_be_null)
3182        return -1;
3183
3184      val = re_match_2_internal (bufp, string1, size1, string2, size2,
3185                                 startpos, regs, stop);
3186#ifndef REGEX_MALLOC
3187#ifdef C_ALLOCA
3188      alloca (0);
3189#endif
3190#endif
3191
3192      if (val >= 0)
3193        return startpos;
3194       
3195      if (val == -2)
3196        return -2;
3197
3198    advance:
3199      if (!range)
3200        break;
3201      else if (range > 0)
3202        {
3203          range--;
3204          startpos++;
3205        }
3206      else
3207        {
3208          range++;
3209          startpos--;
3210        }
3211    }
3212  return -1;
3213} /* re_search_2 */
3214
3215/* Declarations and macros for re_match_2.  */
3216
3217static int bcmp_translate ();
3218static boolean alt_match_null_string_p (),
3219               common_op_match_null_string_p (),
3220               group_match_null_string_p ();
3221
3222/* This converts PTR, a pointer into one of the search strings `string1'
3223   and `string2' into an offset from the beginning of that string.  */
3224#define POINTER_TO_OFFSET(ptr)                  \
3225  (FIRST_STRING_P (ptr)                         \
3226   ? ((regoff_t) ((ptr) - string1))             \
3227   : ((regoff_t) ((ptr) - string2 + size1)))
3228
3229/* Macros for dealing with the split strings in re_match_2.  */
3230
3231#define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
3232
3233/* Call before fetching a character with *d.  This switches over to
3234   string2 if necessary.  */
3235#define PREFETCH()                                                      \
3236  while (d == dend)                                                     \
3237    {                                                                   \
3238      /* End of string2 => fail.  */                                    \
3239      if (dend == end_match_2)                                          \
3240        goto fail;                                                      \
3241      /* End of string1 => advance to string2.  */                      \
3242      d = string2;                                                      \
3243      dend = end_match_2;                                               \
3244    }
3245
3246
3247/* Test if at very beginning or at very end of the virtual concatenation
3248   of `string1' and `string2'.  If only one string, it's `string2'.  */
3249#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3250#define AT_STRINGS_END(d) ((d) == end2)
3251
3252
3253/* Test if D points to a character which is word-constituent.  We have
3254   two special cases to check for: if past the end of string1, look at
3255   the first character in string2; and if before the beginning of
3256   string2, look at the last character in string1.  */
3257#define WORDCHAR_P(d)                                                   \
3258  (SYNTAX ((d) == end1 ? *string2                                       \
3259           : (d) == string2 - 1 ? *(end1 - 1) : *(d))                   \
3260   == Sword)
3261
3262/* Test if the character before D and the one at D differ with respect
3263   to being word-constituent.  */
3264#define AT_WORD_BOUNDARY(d)                                             \
3265  (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)                             \
3266   || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3267
3268
3269/* Free everything we malloc.  */
3270#ifdef MATCH_MAY_ALLOCATE
3271#ifdef REGEX_MALLOC
3272#define FREE_VAR(var) if (var) free (var); var = NULL
3273#define FREE_VARIABLES()                                                \
3274  do {                                                                  \
3275    FREE_VAR (fail_stack.stack);                                        \
3276    FREE_VAR (regstart);                                                \
3277    FREE_VAR (regend);                                                  \
3278    FREE_VAR (old_regstart);                                            \
3279    FREE_VAR (old_regend);                                              \
3280    FREE_VAR (best_regstart);                                           \
3281    FREE_VAR (best_regend);                                             \
3282    FREE_VAR (reg_info);                                                \
3283    FREE_VAR (reg_dummy);                                               \
3284    FREE_VAR (reg_info_dummy);                                          \
3285  } while (0)
3286#else /* not REGEX_MALLOC */
3287/* This used to do alloca (0), but now we do that in the caller.  */
3288#define FREE_VARIABLES() /* Nothing */
3289#endif /* not REGEX_MALLOC */
3290#else
3291#define FREE_VARIABLES() /* Do nothing!  */
3292#endif /* not MATCH_MAY_ALLOCATE */
3293
3294/* These values must meet several constraints.  They must not be valid
3295   register values; since we have a limit of 255 registers (because
3296   we use only one byte in the pattern for the register number), we can
3297   use numbers larger than 255.  They must differ by 1, because of
3298   NUM_FAILURE_ITEMS above.  And the value for the lowest register must
3299   be larger than the value for the highest register, so we do not try
3300   to actually save any registers when none are active.  */
3301#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3302#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3303
3304/* Matching routines.  */
3305
3306#ifndef emacs   /* Emacs never uses this.  */
3307/* re_match is like re_match_2 except it takes only a single string.  */
3308
3309int
3310re_match (bufp, string, size, pos, regs)
3311     struct re_pattern_buffer *bufp;
3312     const char *string;
3313     int size, pos;
3314     struct re_registers *regs;
3315{
3316  int result = re_match_2_internal (bufp, NULL, 0, string, size,
3317                                    pos, regs, size);
3318  alloca (0);
3319  return result;
3320}
3321#endif /* not emacs */
3322
3323
3324/* re_match_2 matches the compiled pattern in BUFP against the
3325   the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3326   and SIZE2, respectively).  We start matching at POS, and stop
3327   matching at STOP.
3328   
3329   If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3330   store offsets for the substring each group matched in REGS.  See the
3331   documentation for exactly how many groups we fill.
3332
3333   We return -1 if no match, -2 if an internal error (such as the
3334   failure stack overflowing).  Otherwise, we return the length of the
3335   matched substring.  */
3336
3337int
3338re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3339     struct re_pattern_buffer *bufp;
3340     const char *string1, *string2;
3341     int size1, size2;
3342     int pos;
3343     struct re_registers *regs;
3344     int stop;
3345{
3346  int result = re_match_2_internal (bufp, string1, size1, string2, size2,
3347                                    pos, regs, stop);
3348  alloca (0);
3349  return result;
3350}
3351
3352/* This is a separate function so that we can force an alloca cleanup
3353   afterwards.  */
3354static int
3355re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
3356     struct re_pattern_buffer *bufp;
3357     const char *string1, *string2;
3358     int size1, size2;
3359     int pos;
3360     struct re_registers *regs;
3361     int stop;
3362{
3363  /* General temporaries.  */
3364  int mcnt;
3365  unsigned char *p1;
3366
3367  /* Just past the end of the corresponding string.  */
3368  const char *end1, *end2;
3369
3370  /* Pointers into string1 and string2, just past the last characters in
3371     each to consider matching.  */
3372  const char *end_match_1, *end_match_2;
3373
3374  /* Where we are in the data, and the end of the current string.  */
3375  const char *d, *dend;
3376 
3377  /* Where we are in the pattern, and the end of the pattern.  */
3378  unsigned char *p = bufp->buffer;
3379  register unsigned char *pend = p + bufp->used;
3380
3381  /* Mark the opcode just after a start_memory, so we can test for an
3382     empty subpattern when we get to the stop_memory.  */
3383  unsigned char *just_past_start_mem = 0;
3384
3385  /* We use this to map every character in the string.  */
3386  char *translate = bufp->translate;
3387
3388  /* Failure point stack.  Each place that can handle a failure further
3389     down the line pushes a failure point on this stack.  It consists of
3390     restart, regend, and reg_info for all registers corresponding to
3391     the subexpressions we're currently inside, plus the number of such
3392     registers, and, finally, two char *'s.  The first char * is where
3393     to resume scanning the pattern; the second one is where to resume
3394     scanning the strings.  If the latter is zero, the failure point is
3395     a ``dummy''; if a failure happens and the failure point is a dummy,
3396     it gets discarded and the next next one is tried.  */
3397#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
3398  fail_stack_type fail_stack;
3399#endif
3400#ifdef DEBUG
3401  static unsigned failure_id = 0;
3402  unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3403#endif
3404
3405  /* We fill all the registers internally, independent of what we
3406     return, for use in backreferences.  The number here includes
3407     an element for register zero.  */
3408  unsigned num_regs = bufp->re_nsub + 1;
3409 
3410  /* The currently active registers.  */
3411  unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3412  unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3413
3414  /* Information on the contents of registers. These are pointers into
3415     the input strings; they record just what was matched (on this
3416     attempt) by a subexpression part of the pattern, that is, the
3417     regnum-th regstart pointer points to where in the pattern we began
3418     matching and the regnum-th regend points to right after where we
3419     stopped matching the regnum-th subexpression.  (The zeroth register
3420     keeps track of what the whole pattern matches.)  */
3421#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3422  const char **regstart, **regend;
3423#endif
3424
3425  /* If a group that's operated upon by a repetition operator fails to
3426     match anything, then the register for its start will need to be
3427     restored because it will have been set to wherever in the string we
3428     are when we last see its open-group operator.  Similarly for a
3429     register's end.  */
3430#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3431  const char **old_regstart, **old_regend;
3432#endif
3433
3434  /* The is_active field of reg_info helps us keep track of which (possibly
3435     nested) subexpressions we are currently in. The matched_something
3436     field of reg_info[reg_num] helps us tell whether or not we have
3437     matched any of the pattern so far this time through the reg_num-th
3438     subexpression.  These two fields get reset each time through any
3439     loop their register is in.  */
3440#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
3441  register_info_type *reg_info;
3442#endif
3443
3444  /* The following record the register info as found in the above
3445     variables when we find a match better than any we've seen before.
3446     This happens as we backtrack through the failure points, which in
3447     turn happens only if we have not yet matched the entire string. */
3448  unsigned best_regs_set = false;
3449#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3450  const char **best_regstart, **best_regend;
3451#endif
3452 
3453  /* Logically, this is `best_regend[0]'.  But we don't want to have to
3454     allocate space for that if we're not allocating space for anything
3455     else (see below).  Also, we never need info about register 0 for
3456     any of the other register vectors, and it seems rather a kludge to
3457     treat `best_regend' differently than the rest.  So we keep track of
3458     the end of the best match so far in a separate variable.  We
3459     initialize this to NULL so that when we backtrack the first time
3460     and need to test it, it's not garbage.  */
3461  const char *match_end = NULL;
3462
3463  /* Used when we pop values we don't care about.  */
3464#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3465  const char **reg_dummy;
3466  register_info_type *reg_info_dummy;
3467#endif
3468
3469#ifdef DEBUG
3470  /* Counts the total number of registers pushed.  */
3471  unsigned num_regs_pushed = 0;         
3472#endif
3473
3474  DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3475 
3476  INIT_FAIL_STACK ();
3477 
3478#ifdef MATCH_MAY_ALLOCATE
3479  /* Do not bother to initialize all the register variables if there are
3480     no groups in the pattern, as it takes a fair amount of time.  If
3481     there are groups, we include space for register 0 (the whole
3482     pattern), even though we never use it, since it simplifies the
3483     array indexing.  We should fix this.  */
3484  if (bufp->re_nsub)
3485    {
3486      regstart = REGEX_TALLOC (num_regs, const char *);
3487      regend = REGEX_TALLOC (num_regs, const char *);
3488      old_regstart = REGEX_TALLOC (num_regs, const char *);
3489      old_regend = REGEX_TALLOC (num_regs, const char *);
3490      best_regstart = REGEX_TALLOC (num_regs, const char *);
3491      best_regend = REGEX_TALLOC (num_regs, const char *);
3492      reg_info = REGEX_TALLOC (num_regs, register_info_type);
3493      reg_dummy = REGEX_TALLOC (num_regs, const char *);
3494      reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3495
3496      if (!(regstart && regend && old_regstart && old_regend && reg_info
3497            && best_regstart && best_regend && reg_dummy && reg_info_dummy))
3498        {
3499          FREE_VARIABLES ();
3500          return -2;
3501        }
3502    }
3503#if defined (REGEX_MALLOC)
3504  else
3505    {
3506      /* We must initialize all our variables to NULL, so that
3507         `FREE_VARIABLES' doesn't try to free them.  */
3508      regstart = regend = old_regstart = old_regend = best_regstart
3509        = best_regend = reg_dummy = NULL;
3510      reg_info = reg_info_dummy = (register_info_type *) NULL;
3511    }
3512#endif /* REGEX_MALLOC */
3513#endif /* MATCH_MAY_ALLOCATE */
3514
3515  /* The starting position is bogus.  */
3516  if (pos < 0 || pos > size1 + size2)
3517    {
3518      FREE_VARIABLES ();
3519      return -1;
3520    }
3521   
3522  /* Initialize subexpression text positions to -1 to mark ones that no
3523     start_memory/stop_memory has been seen for. Also initialize the
3524     register information struct.  */
3525  for (mcnt = 1; mcnt < num_regs; mcnt++)
3526    {
3527      regstart[mcnt] = regend[mcnt]
3528        = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3529       
3530      REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3531      IS_ACTIVE (reg_info[mcnt]) = 0;
3532      MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3533      EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3534    }
3535 
3536  /* We move `string1' into `string2' if the latter's empty -- but not if
3537     `string1' is null.  */
3538  if (size2 == 0 && string1 != NULL)
3539    {
3540      string2 = string1;
3541      size2 = size1;
3542      string1 = 0;
3543      size1 = 0;
3544    }
3545  end1 = string1 + size1;
3546  end2 = string2 + size2;
3547
3548  /* Compute where to stop matching, within the two strings.  */
3549  if (stop <= size1)
3550    {
3551      end_match_1 = string1 + stop;
3552      end_match_2 = string2;
3553    }
3554  else
3555    {
3556      end_match_1 = end1;
3557      end_match_2 = string2 + stop - size1;
3558    }
3559
3560  /* `p' scans through the pattern as `d' scans through the data.
3561     `dend' is the end of the input string that `d' points within.  `d'
3562     is advanced into the following input string whenever necessary, but
3563     this happens before fetching; therefore, at the beginning of the
3564     loop, `d' can be pointing at the end of a string, but it cannot
3565     equal `string2'.  */
3566  if (size1 > 0 && pos <= size1)
3567    {
3568      d = string1 + pos;
3569      dend = end_match_1;
3570    }
3571  else
3572    {
3573      d = string2 + pos - size1;
3574      dend = end_match_2;
3575    }
3576
3577  DEBUG_PRINT1 ("The compiled pattern is: ");
3578  DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
3579  DEBUG_PRINT1 ("The string to match is: `");
3580  DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
3581  DEBUG_PRINT1 ("'\n");
3582 
3583  /* This loops over pattern commands.  It exits by returning from the
3584     function if the match is complete, or it drops through if the match
3585     fails at this starting point in the input data.  */
3586  for (;;)
3587    {
3588      DEBUG_PRINT2 ("\n0x%x: ", p);
3589
3590      if (p == pend)
3591        { /* End of pattern means we might have succeeded.  */
3592          DEBUG_PRINT1 ("end of pattern ... ");
3593         
3594          /* If we haven't matched the entire string, and we want the
3595             longest match, try backtracking.  */
3596          if (d != end_match_2)
3597            {
3598              /* 1 if this match ends in the same string (string1 or string2)
3599                 as the best previous match.  */
3600              boolean same_str_p = (FIRST_STRING_P (match_end)
3601                                    == MATCHING_IN_FIRST_STRING);
3602              /* 1 if this match is the best seen so far.  */
3603              boolean best_match_p;
3604
3605              /* AIX compiler got confused when this was combined
3606                 with the previous declaration.  */
3607              if (same_str_p)
3608                best_match_p = d > match_end;
3609              else
3610                best_match_p = !MATCHING_IN_FIRST_STRING;
3611
3612              DEBUG_PRINT1 ("backtracking.\n");
3613             
3614              if (!FAIL_STACK_EMPTY ())
3615                { /* More failure points to try.  */
3616
3617                  /* If exceeds best match so far, save it.  */
3618                  if (!best_regs_set || best_match_p)
3619                    {
3620                      best_regs_set = true;
3621                      match_end = d;
3622                     
3623                      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
3624                     
3625                      for (mcnt = 1; mcnt < num_regs; mcnt++)
3626                        {
3627                          best_regstart[mcnt] = regstart[mcnt];
3628                          best_regend[mcnt] = regend[mcnt];
3629                        }
3630                    }
3631                  goto fail;           
3632                }
3633
3634              /* If no failure points, don't restore garbage.  And if
3635                 last match is real best match, don't restore second
3636                 best one. */
3637              else if (best_regs_set && !best_match_p)
3638                {
3639                restore_best_regs:
3640                  /* Restore best match.  It may happen that `dend ==
3641                     end_match_1' while the restored d is in string2.
3642                     For example, the pattern `x.*y.*z' against the
3643                     strings `x-' and `y-z-', if the two strings are
3644                     not consecutive in memory.  */
3645                  DEBUG_PRINT1 ("Restoring best registers.\n");
3646                 
3647                  d = match_end;
3648                  dend = ((d >= string1 && d <= end1)
3649                           ? end_match_1 : end_match_2);
3650
3651                  for (mcnt = 1; mcnt < num_regs; mcnt++)
3652                    {
3653                      regstart[mcnt] = best_regstart[mcnt];
3654                      regend[mcnt] = best_regend[mcnt];
3655                    }
3656                }
3657            } /* d != end_match_2 */
3658
3659          DEBUG_PRINT1 ("Accepting match.\n");
3660
3661          /* If caller wants register contents data back, do it.  */
3662          if (regs && !bufp->no_sub)
3663            {
3664              /* Have the register data arrays been allocated?  */
3665              if (bufp->regs_allocated == REGS_UNALLOCATED)
3666                { /* No.  So allocate them with malloc.  We need one
3667                     extra element beyond `num_regs' for the `-1' marker
3668                     GNU code uses.  */
3669                  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
3670                  regs->start = TALLOC (regs->num_regs, regoff_t);
3671                  regs->end = TALLOC (regs->num_regs, regoff_t);
3672                  if (regs->start == NULL || regs->end == NULL)
3673                    return -2;
3674                  bufp->regs_allocated = REGS_REALLOCATE;
3675                }
3676              else if (bufp->regs_allocated == REGS_REALLOCATE)
3677                { /* Yes.  If we need more elements than were already
3678                     allocated, reallocate them.  If we need fewer, just
3679                     leave it alone.  */
3680                  if (regs->num_regs < num_regs + 1)
3681                    {
3682                      regs->num_regs = num_regs + 1;
3683                      RETALLOC (regs->start, regs->num_regs, regoff_t);
3684                      RETALLOC (regs->end, regs->num_regs, regoff_t);
3685                      if (regs->start == NULL || regs->end == NULL)
3686                        return -2;
3687                    }
3688                }
3689              else
3690                {
3691                  /* These braces fend off a "empty body in an else-statement"
3692                     warning under GCC when assert expands to nothing.  */
3693                  assert (bufp->regs_allocated == REGS_FIXED);
3694                }
3695
3696              /* Convert the pointer data in `regstart' and `regend' to
3697                 indices.  Register zero has to be set differently,
3698                 since we haven't kept track of any info for it.  */
3699              if (regs->num_regs > 0)
3700                {
3701                  regs->start[0] = pos;
3702                  regs->end[0] = (MATCHING_IN_FIRST_STRING
3703                                  ? ((regoff_t) (d - string1))
3704                                  : ((regoff_t) (d - string2 + size1)));
3705                }
3706             
3707              /* Go through the first `min (num_regs, regs->num_regs)'
3708                 registers, since that is all we initialized.  */
3709              for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
3710                {
3711                  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
3712                    regs->start[mcnt] = regs->end[mcnt] = -1;
3713                  else
3714                    {
3715                      regs->start[mcnt]
3716                        = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
3717                      regs->end[mcnt]
3718                        = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
3719                    }
3720                }
3721             
3722              /* If the regs structure we return has more elements than
3723                 were in the pattern, set the extra elements to -1.  If
3724                 we (re)allocated the registers, this is the case,
3725                 because we always allocate enough to have at least one
3726                 -1 at the end.  */
3727              for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
3728                regs->start[mcnt] = regs->end[mcnt] = -1;
3729            } /* regs && !bufp->no_sub */
3730
3731          FREE_VARIABLES ();
3732          DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
3733                        nfailure_points_pushed, nfailure_points_popped,
3734                        nfailure_points_pushed - nfailure_points_popped);
3735          DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
3736
3737          mcnt = d - pos - (MATCHING_IN_FIRST_STRING
3738                            ? string1
3739                            : string2 - size1);
3740
3741          DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
3742
3743          return mcnt;
3744        }
3745
3746      /* Otherwise match next pattern command.  */
3747#ifdef SWITCH_ENUM_BUG
3748      switch ((int) ((re_opcode_t) *p++))
3749#else
3750      switch ((re_opcode_t) *p++)
3751#endif
3752        {
3753        /* Ignore these.  Used to ignore the n of succeed_n's which
3754           currently have n == 0.  */
3755        case no_op:
3756          DEBUG_PRINT1 ("EXECUTING no_op.\n");
3757          break;
3758
3759
3760        /* Match the next n pattern characters exactly.  The following
3761           byte in the pattern defines n, and the n bytes after that
3762           are the characters to match.  */
3763        case exactn:
3764          mcnt = *p++;
3765          DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
3766
3767          /* This is written out as an if-else so we don't waste time
3768             testing `translate' inside the loop.  */
3769          if (translate)
3770            {
3771              do
3772                {
3773                  PREFETCH ();
3774                  if (translate[(unsigned char) *d++] != (char) *p++)
3775                    goto fail;
3776                }
3777              while (--mcnt);
3778            }
3779          else
3780            {
3781              do
3782                {
3783                  PREFETCH ();
3784                  if (*d++ != (char) *p++) goto fail;
3785                }
3786              while (--mcnt);
3787            }
3788          SET_REGS_MATCHED ();
3789          break;
3790
3791
3792        /* Match any character except possibly a newline or a null.  */
3793        case anychar:
3794          DEBUG_PRINT1 ("EXECUTING anychar.\n");
3795
3796          PREFETCH ();
3797
3798          if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
3799              || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
3800            goto fail;
3801
3802          SET_REGS_MATCHED ();
3803          DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
3804          d++;
3805          break;
3806
3807
3808        case charset:
3809        case charset_not:
3810          {
3811            register unsigned char c;
3812            boolean not = (re_opcode_t) *(p - 1) == charset_not;
3813
3814            DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
3815
3816            PREFETCH ();
3817            c = TRANSLATE (*d); /* The character to match.  */
3818
3819            /* Cast to `unsigned' instead of `unsigned char' in case the
3820               bit list is a full 32 bytes long.  */
3821            if (c < (unsigned) (*p * BYTEWIDTH)
3822                && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
3823              not = !not;
3824
3825            p += 1 + *p;
3826
3827            if (!not) goto fail;
3828           
3829            SET_REGS_MATCHED ();
3830            d++;
3831            break;
3832          }
3833
3834
3835        /* The beginning of a group is represented by start_memory.
3836           The arguments are the register number in the next byte, and the
3837           number of groups inner to this one in the next.  The text
3838           matched within the group is recorded (in the internal
3839           registers data structure) under the register number.  */
3840        case start_memory:
3841          DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
3842
3843          /* Find out if this group can match the empty string.  */
3844          p1 = p;               /* To send to group_match_null_string_p.  */
3845         
3846          if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
3847            REG_MATCH_NULL_STRING_P (reg_info[*p])
3848              = group_match_null_string_p (&p1, pend, reg_info);
3849
3850          /* Save the position in the string where we were the last time
3851             we were at this open-group operator in case the group is
3852             operated upon by a repetition operator, e.g., with `(a*)*b'
3853             against `ab'; then we want to ignore where we are now in
3854             the string in case this attempt to match fails.  */
3855          old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3856                             ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
3857                             : regstart[*p];
3858          DEBUG_PRINT2 ("  old_regstart: %d\n",
3859                         POINTER_TO_OFFSET (old_regstart[*p]));
3860
3861          regstart[*p] = d;
3862          DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
3863
3864          IS_ACTIVE (reg_info[*p]) = 1;
3865          MATCHED_SOMETHING (reg_info[*p]) = 0;
3866         
3867          /* This is the new highest active register.  */
3868          highest_active_reg = *p;
3869         
3870          /* If nothing was active before, this is the new lowest active
3871             register.  */
3872          if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3873            lowest_active_reg = *p;
3874
3875          /* Move past the register number and inner group count.  */
3876          p += 2;
3877          just_past_start_mem = p;
3878          break;
3879
3880
3881        /* The stop_memory opcode represents the end of a group.  Its
3882           arguments are the same as start_memory's: the register
3883           number, and the number of inner groups.  */
3884        case stop_memory:
3885          DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
3886             
3887          /* We need to save the string position the last time we were at
3888             this close-group operator in case the group is operated
3889             upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
3890             against `aba'; then we want to ignore where we are now in
3891             the string in case this attempt to match fails.  */
3892          old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3893                           ? REG_UNSET (regend[*p]) ? d : regend[*p]
3894                           : regend[*p];
3895          DEBUG_PRINT2 ("      old_regend: %d\n",
3896                         POINTER_TO_OFFSET (old_regend[*p]));
3897
3898          regend[*p] = d;
3899          DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
3900
3901          /* This register isn't active anymore.  */
3902          IS_ACTIVE (reg_info[*p]) = 0;
3903         
3904          /* If this was the only register active, nothing is active
3905             anymore.  */
3906          if (lowest_active_reg == highest_active_reg)
3907            {
3908              lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3909              highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3910            }
3911          else
3912            { /* We must scan for the new highest active register, since
3913                 it isn't necessarily one less than now: consider
3914                 (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
3915                 new highest active register is 1.  */
3916              unsigned char r = *p - 1;
3917              while (r > 0 && !IS_ACTIVE (reg_info[r]))
3918                r--;
3919             
3920              /* If we end up at register zero, that means that we saved
3921                 the registers as the result of an `on_failure_jump', not
3922                 a `start_memory', and we jumped to past the innermost
3923                 `stop_memory'.  For example, in ((.)*) we save
3924                 registers 1 and 2 as a result of the *, but when we pop
3925                 back to the second ), we are at the stop_memory 1.
3926                 Thus, nothing is active.  */
3927              if (r == 0)
3928                {
3929                  lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3930                  highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3931                }
3932              else
3933                highest_active_reg = r;
3934            }
3935         
3936          /* If just failed to match something this time around with a
3937             group that's operated on by a repetition operator, try to
3938             force exit from the ``loop'', and restore the register
3939             information for this group that we had before trying this
3940             last match.  */
3941          if ((!MATCHED_SOMETHING (reg_info[*p])
3942               || just_past_start_mem == p - 1)
3943              && (p + 2) < pend)             
3944            {
3945              boolean is_a_jump_n = false;
3946             
3947              p1 = p + 2;
3948              mcnt = 0;
3949              switch ((re_opcode_t) *p1++)
3950                {
3951                  case jump_n:
3952                    is_a_jump_n = true;
3953                  case pop_failure_jump:
3954                  case maybe_pop_jump:
3955                  case jump:
3956                  case dummy_failure_jump:
3957                    EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3958                    if (is_a_jump_n)
3959                      p1 += 2;
3960                    break;
3961                 
3962                  default:
3963                    /* do nothing */ ;
3964                }
3965              p1 += mcnt;
3966       
3967              /* If the next operation is a jump backwards in the pattern
3968                 to an on_failure_jump right before the start_memory
3969                 corresponding to this stop_memory, exit from the loop
3970                 by forcing a failure after pushing on the stack the
3971                 on_failure_jump's jump in the pattern, and d.  */
3972              if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
3973                  && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
3974                {
3975                  /* If this group ever matched anything, then restore
3976                     what its registers were before trying this last
3977                     failed match, e.g., with `(a*)*b' against `ab' for
3978                     regstart[1], and, e.g., with `((a*)*(b*)*)*'
3979                     against `aba' for regend[3].
3980                     
3981                     Also restore the registers for inner groups for,
3982                     e.g., `((a*)(b*))*' against `aba' (register 3 would
3983                     otherwise get trashed).  */
3984                     
3985                  if (EVER_MATCHED_SOMETHING (reg_info[*p]))
3986                    {
3987                      unsigned r;
3988       
3989                      EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
3990                     
3991                      /* Restore this and inner groups' (if any) registers.  */
3992                      for (r = *p; r < *p + *(p + 1); r++)
3993                        {
3994                          regstart[r] = old_regstart[r];
3995
3996                          /* xx why this test?  */
3997                          if ((int) old_regend[r] >= (int) regstart[r])
3998                            regend[r] = old_regend[r];
3999                        }     
4000                    }
4001                  p1++;
4002                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4003                  PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
4004
4005                  goto fail;
4006                }
4007            }
4008         
4009          /* Move past the register number and the inner group count.  */
4010          p += 2;
4011          break;
4012
4013
4014        /* \<digit> has been turned into a `duplicate' command which is
4015           followed by the numeric value of <digit> as the register number.  */
4016        case duplicate:
4017          {
4018            register const char *d2, *dend2;
4019            int regno = *p++;   /* Get which register to match against.  */
4020            DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
4021
4022            /* Can't back reference a group which we've never matched.  */
4023            if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
4024              goto fail;
4025             
4026            /* Where in input to try to start matching.  */
4027            d2 = regstart[regno];
4028           
4029            /* Where to stop matching; if both the place to start and
4030               the place to stop matching are in the same string, then
4031               set to the place to stop, otherwise, for now have to use
4032               the end of the first string.  */
4033
4034            dend2 = ((FIRST_STRING_P (regstart[regno])
4035                      == FIRST_STRING_P (regend[regno]))
4036                     ? regend[regno] : end_match_1);
4037            for (;;)
4038              {
4039                /* If necessary, advance to next segment in register
4040                   contents.  */
4041                while (d2 == dend2)
4042                  {
4043                    if (dend2 == end_match_2) break;
4044                    if (dend2 == regend[regno]) break;
4045
4046                    /* End of string1 => advance to string2. */
4047                    d2 = string2;
4048                    dend2 = regend[regno];
4049                  }
4050                /* At end of register contents => success */
4051                if (d2 == dend2) break;
4052
4053                /* If necessary, advance to next segment in data.  */
4054                PREFETCH ();
4055
4056                /* How many characters left in this segment to match.  */
4057                mcnt = dend - d;
4058               
4059                /* Want how many consecutive characters we can match in
4060                   one shot, so, if necessary, adjust the count.  */
4061                if (mcnt > dend2 - d2)
4062                  mcnt = dend2 - d2;
4063                 
4064                /* Compare that many; failure if mismatch, else move
4065                   past them.  */
4066                if (translate
4067                    ? bcmp_translate (d, d2, mcnt, translate)
4068                    : bcmp (d, d2, mcnt))
4069                  goto fail;
4070                d += mcnt, d2 += mcnt;
4071              }
4072          }
4073          break;
4074
4075
4076        /* begline matches the empty string at the beginning of the string
4077           (unless `not_bol' is set in `bufp'), and, if
4078           `newline_anchor' is set, after newlines.  */
4079        case begline:
4080          DEBUG_PRINT1 ("EXECUTING begline.\n");
4081         
4082          if (AT_STRINGS_BEG (d))
4083            {
4084              if (!bufp->not_bol) break;
4085            }
4086          else if (d[-1] == '\n' && bufp->newline_anchor)
4087            {
4088              break;
4089            }
4090          /* In all other cases, we fail.  */
4091          goto fail;
4092
4093
4094        /* endline is the dual of begline.  */
4095        case endline:
4096          DEBUG_PRINT1 ("EXECUTING endline.\n");
4097
4098          if (AT_STRINGS_END (d))
4099            {
4100              if (!bufp->not_eol) break;
4101            }
4102         
4103          /* We have to ``prefetch'' the next character.  */
4104          else if ((d == end1 ? *string2 : *d) == '\n'
4105                   && bufp->newline_anchor)
4106            {
4107              break;
4108            }
4109          goto fail;
4110
4111
4112        /* Match at the very beginning of the data.  */
4113        case begbuf:
4114          DEBUG_PRINT1 ("EXECUTING begbuf.\n");
4115          if (AT_STRINGS_BEG (d))
4116            break;
4117          goto fail;
4118
4119
4120        /* Match at the very end of the data.  */
4121        case endbuf:
4122          DEBUG_PRINT1 ("EXECUTING endbuf.\n");
4123          if (AT_STRINGS_END (d))
4124            break;
4125          goto fail;
4126
4127
4128        /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
4129           pushes NULL as the value for the string on the stack.  Then
4130           `pop_failure_point' will keep the current value for the
4131           string, instead of restoring it.  To see why, consider
4132           matching `foo\nbar' against `.*\n'.  The .* matches the foo;
4133           then the . fails against the \n.  But the next thing we want
4134           to do is match the \n against the \n; if we restored the
4135           string value, we would be back at the foo.
4136           
4137           Because this is used only in specific cases, we don't need to
4138           check all the things that `on_failure_jump' does, to make
4139           sure the right things get saved on the stack.  Hence we don't
4140           share its code.  The only reason to push anything on the
4141           stack at all is that otherwise we would have to change
4142           `anychar's code to do something besides goto fail in this
4143           case; that seems worse than this.  */
4144        case on_failure_keep_string_jump:
4145          DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
4146         
4147          EXTRACT_NUMBER_AND_INCR (mcnt, p);
4148          DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
4149
4150          PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
4151          break;
4152
4153
4154        /* Uses of on_failure_jump:
4155       
4156           Each alternative starts with an on_failure_jump that points
4157           to the beginning of the next alternative.  Each alternative
4158           except the last ends with a jump that in effect jumps past
4159           the rest of the alternatives.  (They really jump to the
4160           ending jump of the following alternative, because tensioning
4161           these jumps is a hassle.)
4162
4163           Repeats start with an on_failure_jump that points past both
4164           the repetition text and either the following jump or
4165           pop_failure_jump back to this on_failure_jump.  */
4166        case on_failure_jump:
4167        on_failure:
4168          DEBUG_PRINT1 ("EXECUTING on_failure_jump");
4169
4170          EXTRACT_NUMBER_AND_INCR (mcnt, p);
4171          DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
4172
4173          /* If this on_failure_jump comes right before a group (i.e.,
4174             the original * applied to a group), save the information
4175             for that group and all inner ones, so that if we fail back
4176             to this point, the group's information will be correct.
4177             For example, in \(a*\)*\1, we need the preceding group,
4178             and in \(\(a*\)b*\)\2, we need the inner group.  */
4179
4180          /* We can't use `p' to check ahead because we push
4181             a failure point to `p + mcnt' after we do this.  */
4182          p1 = p;
4183
4184          /* We need to skip no_op's before we look for the
4185             start_memory in case this on_failure_jump is happening as
4186             the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
4187             against aba.  */
4188          while (p1 < pend && (re_opcode_t) *p1 == no_op)
4189            p1++;
4190
4191          if (p1 < pend && (re_opcode_t) *p1 == start_memory)
4192            {
4193              /* We have a new highest active register now.  This will
4194                 get reset at the start_memory we are about to get to,
4195                 but we will have saved all the registers relevant to
4196                 this repetition op, as described above.  */
4197              highest_active_reg = *(p1 + 1) + *(p1 + 2);
4198              if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4199                lowest_active_reg = *(p1 + 1);
4200            }
4201
4202          DEBUG_PRINT1 (":\n");
4203          PUSH_FAILURE_POINT (p + mcnt, d, -2);
4204          break;
4205
4206
4207        /* A smart repeat ends with `maybe_pop_jump'.
4208           We change it to either `pop_failure_jump' or `jump'.  */
4209        case maybe_pop_jump:
4210          EXTRACT_NUMBER_AND_INCR (mcnt, p);
4211          DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
4212          {
4213            register unsigned char *p2 = p;
4214
4215            /* Compare the beginning of the repeat with what in the
4216               pattern follows its end. If we can establish that there
4217               is nothing that they would both match, i.e., that we
4218               would have to backtrack because of (as in, e.g., `a*a')
4219               then we can change to pop_failure_jump, because we'll
4220               never have to backtrack.
4221               
4222               This is not true in the case of alternatives: in
4223               `(a|ab)*' we do need to backtrack to the `ab' alternative
4224               (e.g., if the string was `ab').  But instead of trying to
4225               detect that here, the alternative has put on a dummy
4226               failure point which is what we will end up popping.  */
4227
4228            /* Skip over open/close-group commands.
4229               If what follows this loop is a ...+ construct,
4230               look at what begins its body, since we will have to
4231               match at least one of that.  */
4232            while (1)
4233              {
4234                if (p2 + 2 < pend
4235                    && ((re_opcode_t) *p2 == stop_memory
4236                        || (re_opcode_t) *p2 == start_memory))
4237                  p2 += 3;
4238                else if (p2 + 6 < pend
4239                         && (re_opcode_t) *p2 == dummy_failure_jump)
4240                  p2 += 6;
4241                else
4242                  break;
4243              }
4244
4245            p1 = p + mcnt;
4246            /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4247               to the `maybe_finalize_jump' of this case.  Examine what
4248               follows.  */
4249
4250            /* If we're at the end of the pattern, we can change.  */
4251            if (p2 == pend)
4252              {
4253                /* Consider what happens when matching ":\(.*\)"
4254                   against ":/".  I don't really understand this code
4255                   yet.  */
4256                p[-3] = (unsigned char) pop_failure_jump;
4257                DEBUG_PRINT1
4258                  ("  End of pattern: change to `pop_failure_jump'.\n");
4259              }
4260
4261            else if ((re_opcode_t) *p2 == exactn
4262                     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4263              {
4264                register unsigned char c
4265                  = *p2 == (unsigned char) endline ? '\n' : p2[2];
4266
4267                if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4268                  {
4269                    p[-3] = (unsigned char) pop_failure_jump;
4270                    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4271                                  c, p1[5]);
4272                  }
4273                 
4274                else if ((re_opcode_t) p1[3] == charset
4275                         || (re_opcode_t) p1[3] == charset_not)
4276                  {
4277                    int not = (re_opcode_t) p1[3] == charset_not;
4278                   
4279                    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4280                        && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4281                      not = !not;
4282
4283                    /* `not' is equal to 1 if c would match, which means
4284                        that we can't change to pop_failure_jump.  */
4285                    if (!not)
4286                      {
4287                        p[-3] = (unsigned char) pop_failure_jump;
4288                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4289                      }
4290                  }
4291              }
4292            else if ((re_opcode_t) *p2 == charset)
4293              {
4294#ifdef DEBUG
4295                register unsigned char c
4296                  = *p2 == (unsigned char) endline ? '\n' : p2[2];
4297#endif
4298
4299                if ((re_opcode_t) p1[3] == exactn
4300                    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
4301                          && (p2[1 + p1[4] / BYTEWIDTH]
4302                              & (1 << (p1[4] % BYTEWIDTH)))))
4303                  {
4304                    p[-3] = (unsigned char) pop_failure_jump;
4305                    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4306                                  c, p1[5]);
4307                  }
4308                 
4309                else if ((re_opcode_t) p1[3] == charset_not)
4310                  {
4311                    int idx;
4312                    /* We win if the charset_not inside the loop
4313                       lists every character listed in the charset after.  */
4314                    for (idx = 0; idx < (int) p2[1]; idx++)
4315                      if (! (p2[2 + idx] == 0
4316                             || (idx < (int) p1[4]
4317                                 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
4318                        break;
4319
4320                    if (idx == p2[1])
4321                      {
4322                        p[-3] = (unsigned char) pop_failure_jump;
4323                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4324                      }
4325                  }
4326                else if ((re_opcode_t) p1[3] == charset)
4327                  {
4328                    int idx;
4329                    /* We win if the charset inside the loop
4330                       has no overlap with the one after the loop.  */
4331                    for (idx = 0;
4332                         idx < (int) p2[1] && idx < (int) p1[4];
4333                         idx++)
4334                      if ((p2[2 + idx] & p1[5 + idx]) != 0)
4335                        break;
4336
4337                    if (idx == p2[1] || idx == p1[4])
4338                      {
4339                        p[-3] = (unsigned char) pop_failure_jump;
4340                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4341                      }
4342                  }
4343              }
4344          }
4345          p -= 2;               /* Point at relative address again.  */
4346          if ((re_opcode_t) p[-1] != pop_failure_jump)
4347            {
4348              p[-1] = (unsigned char) jump;
4349              DEBUG_PRINT1 ("  Match => jump.\n");
4350              goto unconditional_jump;
4351            }
4352        /* Note fall through.  */
4353
4354
4355        /* The end of a simple repeat has a pop_failure_jump back to
4356           its matching on_failure_jump, where the latter will push a
4357           failure point.  The pop_failure_jump takes off failure
4358           points put on by this pop_failure_jump's matching
4359           on_failure_jump; we got through the pattern to here from the
4360           matching on_failure_jump, so didn't fail.  */
4361        case pop_failure_jump:
4362          {
4363            /* We need to pass separate storage for the lowest and
4364               highest registers, even though we don't care about the
4365               actual values.  Otherwise, we will restore only one
4366               register from the stack, since lowest will == highest in
4367               `pop_failure_point'.  */
4368            unsigned dummy_low_reg, dummy_high_reg;
4369            unsigned char *pdummy;
4370            const char *sdummy;
4371
4372            DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4373            POP_FAILURE_POINT (sdummy, pdummy,
4374                               dummy_low_reg, dummy_high_reg,
4375                               reg_dummy, reg_dummy, reg_info_dummy);
4376          }
4377          /* Note fall through.  */
4378
4379         
4380        /* Unconditionally jump (without popping any failure points).  */
4381        case jump:
4382        unconditional_jump:
4383          EXTRACT_NUMBER_AND_INCR (mcnt, p);    /* Get the amount to jump.  */
4384          DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4385          p += mcnt;                            /* Do the jump.  */
4386          DEBUG_PRINT2 ("(to 0x%x).\n", p);
4387          break;
4388
4389       
4390        /* We need this opcode so we can detect where alternatives end
4391           in `group_match_null_string_p' et al.  */
4392        case jump_past_alt:
4393          DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4394          goto unconditional_jump;
4395
4396
4397        /* Normally, the on_failure_jump pushes a failure point, which
4398           then gets popped at pop_failure_jump.  We will end up at
4399           pop_failure_jump, also, and with a pattern of, say, `a+', we
4400           are skipping over the on_failure_jump, so we have to push
4401           something meaningless for pop_failure_jump to pop.  */
4402        case dummy_failure_jump:
4403          DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4404          /* It doesn't matter what we push for the string here.  What
4405             the code at `fail' tests is the value for the pattern.  */
4406          PUSH_FAILURE_POINT (0, 0, -2);
4407          goto unconditional_jump;
4408
4409
4410        /* At the end of an alternative, we need to push a dummy failure
4411           point in case we are followed by a `pop_failure_jump', because
4412           we don't want the failure point for the alternative to be
4413           popped.  For example, matching `(a|ab)*' against `aab'
4414           requires that we match the `ab' alternative.  */
4415        case push_dummy_failure:
4416          DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4417          /* See comments just above at `dummy_failure_jump' about the
4418             two zeroes.  */
4419          PUSH_FAILURE_POINT (0, 0, -2);
4420          break;
4421
4422        /* Have to succeed matching what follows at least n times.
4423           After that, handle like `on_failure_jump'.  */
4424        case succeed_n:
4425          EXTRACT_NUMBER (mcnt, p + 2);
4426          DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4427
4428          assert (mcnt >= 0);
4429          /* Originally, this is how many times we HAVE to succeed.  */
4430          if (mcnt > 0)
4431            {
4432               mcnt--;
4433               p += 2;
4434               STORE_NUMBER_AND_INCR (p, mcnt);
4435               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p, mcnt);
4436            }
4437          else if (mcnt == 0)
4438            {
4439              DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
4440              p[2] = (unsigned char) no_op;
4441              p[3] = (unsigned char) no_op;
4442              goto on_failure;
4443            }
4444          break;
4445       
4446        case jump_n:
4447          EXTRACT_NUMBER (mcnt, p + 2);
4448          DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4449
4450          /* Originally, this is how many times we CAN jump.  */
4451          if (mcnt)
4452            {
4453               mcnt--;
4454               STORE_NUMBER (p + 2, mcnt);
4455               goto unconditional_jump;     
4456            }
4457          /* If don't have to jump any more, skip over the rest of command.  */
4458          else     
4459            p += 4;                 
4460          break;
4461       
4462        case set_number_at:
4463          {
4464            DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4465
4466            EXTRACT_NUMBER_AND_INCR (mcnt, p);
4467            p1 = p + mcnt;
4468            EXTRACT_NUMBER_AND_INCR (mcnt, p);
4469            DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
4470            STORE_NUMBER (p1, mcnt);
4471            break;
4472          }
4473
4474        case wordbound:
4475          DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4476          if (AT_WORD_BOUNDARY (d))
4477            break;
4478          goto fail;
4479
4480        case notwordbound:
4481          DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4482          if (AT_WORD_BOUNDARY (d))
4483            goto fail;
4484          break;
4485
4486        case wordbeg:
4487          DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
4488          if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
4489            break;
4490          goto fail;
4491
4492        case wordend:
4493          DEBUG_PRINT1 ("EXECUTING wordend.\n");
4494          if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
4495              && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
4496            break;
4497          goto fail;
4498
4499#ifdef emacs
4500        case before_dot:
4501          DEBUG_PRINT1 ("EXECUTING before_dot.\n");
4502          if (PTR_CHAR_POS ((unsigned char *) d) >= point)
4503            goto fail;
4504          break;
4505 
4506        case at_dot:
4507          DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4508          if (PTR_CHAR_POS ((unsigned char *) d) != point)
4509            goto fail;
4510          break;
4511 
4512        case after_dot:
4513          DEBUG_PRINT1 ("EXECUTING after_dot.\n");
4514          if (PTR_CHAR_POS ((unsigned char *) d) <= point)
4515            goto fail;
4516          break;
4517#if 0 /* not emacs19 */
4518        case at_dot:
4519          DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4520          if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
4521            goto fail;
4522          break;
4523#endif /* not emacs19 */
4524
4525        case syntaxspec:
4526          DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
4527          mcnt = *p++;
4528          goto matchsyntax;
4529
4530        case wordchar:
4531          DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
4532          mcnt = (int) Sword;
4533        matchsyntax:
4534          PREFETCH ();
4535          /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
4536          d++;
4537          if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
4538            goto fail;
4539          SET_REGS_MATCHED ();
4540          break;
4541
4542        case notsyntaxspec:
4543          DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
4544          mcnt = *p++;
4545          goto matchnotsyntax;
4546
4547        case notwordchar:
4548          DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
4549          mcnt = (int) Sword;
4550        matchnotsyntax:
4551          PREFETCH ();
4552          /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
4553          d++;
4554          if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
4555            goto fail;
4556          SET_REGS_MATCHED ();
4557          break;
4558
4559#else /* not emacs */
4560        case wordchar:
4561          DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
4562          PREFETCH ();
4563          if (!WORDCHAR_P (d))
4564            goto fail;
4565          SET_REGS_MATCHED ();
4566          d++;
4567          break;
4568         
4569        case notwordchar:
4570          DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
4571          PREFETCH ();
4572          if (WORDCHAR_P (d))
4573            goto fail;
4574          SET_REGS_MATCHED ();
4575          d++;
4576          break;
4577#endif /* not emacs */
4578         
4579        default:
4580          abort ();
4581        }
4582      continue;  /* Successfully executed one pattern command; keep going.  */
4583
4584
4585    /* We goto here if a matching operation fails. */
4586    fail:
4587      if (!FAIL_STACK_EMPTY ())
4588        { /* A restart point is known.  Restore to that state.  */
4589          DEBUG_PRINT1 ("\nFAIL:\n");
4590          POP_FAILURE_POINT (d, p,
4591                             lowest_active_reg, highest_active_reg,
4592                             regstart, regend, reg_info);
4593
4594          /* If this failure point is a dummy, try the next one.  */
4595          if (!p)
4596            goto fail;
4597
4598          /* If we failed to the end of the pattern, don't examine *p.  */
4599          assert (p <= pend);
4600          if (p < pend)
4601            {
4602              boolean is_a_jump_n = false;
4603             
4604              /* If failed to a backwards jump that's part of a repetition
4605                 loop, need to pop this failure point and use the next one.  */
4606              switch ((re_opcode_t) *p)
4607                {
4608                case jump_n:
4609                  is_a_jump_n = true;
4610                case maybe_pop_jump:
4611                case pop_failure_jump:
4612                case jump:
4613                  p1 = p + 1;
4614                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4615                  p1 += mcnt;   
4616
4617                  if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
4618                      || (!is_a_jump_n
4619                          && (re_opcode_t) *p1 == on_failure_jump))
4620                    goto fail;
4621                  break;
4622                default:
4623                  /* do nothing */ ;
4624                }
4625            }
4626
4627          if (d >= string1 && d <= end1)
4628            dend = end_match_1;
4629        }
4630      else
4631        break;   /* Matching at this starting point really fails.  */
4632    } /* for (;;) */
4633
4634  if (best_regs_set)
4635    goto restore_best_regs;
4636
4637  FREE_VARIABLES ();
4638
4639  return -1;                            /* Failure to match.  */
4640} /* re_match_2 */
4641
4642/* Subroutine definitions for re_match_2.  */
4643
4644
4645/* We are passed P pointing to a register number after a start_memory.
4646   
4647   Return true if the pattern up to the corresponding stop_memory can
4648   match the empty string, and false otherwise.
4649   
4650   If we find the matching stop_memory, sets P to point to one past its number.
4651   Otherwise, sets P to an undefined byte less than or equal to END.
4652
4653   We don't handle duplicates properly (yet).  */
4654
4655static boolean
4656group_match_null_string_p (p, end, reg_info)
4657    unsigned char **p, *end;
4658    register_info_type *reg_info;
4659{
4660  int mcnt;
4661  /* Point to after the args to the start_memory.  */
4662  unsigned char *p1 = *p + 2;
4663 
4664  while (p1 < end)
4665    {
4666      /* Skip over opcodes that can match nothing, and return true or
4667         false, as appropriate, when we get to one that can't, or to the
4668         matching stop_memory.  */
4669     
4670      switch ((re_opcode_t) *p1)
4671        {
4672        /* Could be either a loop or a series of alternatives.  */
4673        case on_failure_jump:
4674          p1++;
4675          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4676         
4677          /* If the next operation is not a jump backwards in the
4678             pattern.  */
4679
4680          if (mcnt >= 0)
4681            {
4682              /* Go through the on_failure_jumps of the alternatives,
4683                 seeing if any of the alternatives cannot match nothing.
4684                 The last alternative starts with only a jump,
4685                 whereas the rest start with on_failure_jump and end
4686                 with a jump, e.g., here is the pattern for `a|b|c':
4687
4688                 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
4689                 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
4690                 /exactn/1/c                                           
4691
4692                 So, we have to first go through the first (n-1)
4693                 alternatives and then deal with the last one separately.  */
4694
4695
4696              /* Deal with the first (n-1) alternatives, which start
4697                 with an on_failure_jump (see above) that jumps to right
4698                 past a jump_past_alt.  */
4699
4700              while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
4701                {
4702                  /* `mcnt' holds how many bytes long the alternative
4703                     is, including the ending `jump_past_alt' and
4704                     its number.  */
4705
4706                  if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
4707                                                      reg_info))
4708                    return false;
4709
4710                  /* Move to right after this alternative, including the
4711                     jump_past_alt.  */
4712                  p1 += mcnt;   
4713
4714                  /* Break if it's the beginning of an n-th alternative
4715                     that doesn't begin with an on_failure_jump.  */
4716                  if ((re_opcode_t) *p1 != on_failure_jump)
4717                    break;
4718               
4719                  /* Still have to check that it's not an n-th
4720                     alternative that starts with an on_failure_jump.  */
4721                  p1++;
4722                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4723                  if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
4724                    {
4725                      /* Get to the beginning of the n-th alternative.  */
4726                      p1 -= 3;
4727                      break;
4728                    }
4729                }
4730
4731              /* Deal with the last alternative: go back and get number
4732                 of the `jump_past_alt' just before it.  `mcnt' contains
4733                 the length of the alternative.  */
4734              EXTRACT_NUMBER (mcnt, p1 - 2);
4735
4736              if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
4737                return false;
4738
4739              p1 += mcnt;       /* Get past the n-th alternative.  */
4740            } /* if mcnt > 0 */
4741          break;
4742
4743         
4744        case stop_memory:
4745          assert (p1[1] == **p);
4746          *p = p1 + 2;
4747          return true;
4748
4749       
4750        default:
4751          if (!common_op_match_null_string_p (&p1, end, reg_info))
4752            return false;
4753        }
4754    } /* while p1 < end */
4755
4756  return false;
4757} /* group_match_null_string_p */
4758
4759
4760/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
4761   It expects P to be the first byte of a single alternative and END one
4762   byte past the last. The alternative can contain groups.  */
4763   
4764static boolean
4765alt_match_null_string_p (p, end, reg_info)
4766    unsigned char *p, *end;
4767    register_info_type *reg_info;
4768{
4769  int mcnt;
4770  unsigned char *p1 = p;
4771 
4772  while (p1 < end)
4773    {
4774      /* Skip over opcodes that can match nothing, and break when we get
4775         to one that can't.  */
4776     
4777      switch ((re_opcode_t) *p1)
4778        {
4779        /* It's a loop.  */
4780        case on_failure_jump:
4781          p1++;
4782          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4783          p1 += mcnt;
4784          break;
4785         
4786        default:
4787          if (!common_op_match_null_string_p (&p1, end, reg_info))
4788            return false;
4789        }
4790    }  /* while p1 < end */
4791
4792  return true;
4793} /* alt_match_null_string_p */
4794
4795
4796/* Deals with the ops common to group_match_null_string_p and
4797   alt_match_null_string_p. 
4798   
4799   Sets P to one after the op and its arguments, if any.  */
4800
4801static boolean
4802common_op_match_null_string_p (p, end, reg_info)
4803    unsigned char **p, *end;
4804    register_info_type *reg_info;
4805{
4806  int mcnt;
4807  boolean ret;
4808  int reg_no;
4809  unsigned char *p1 = *p;
4810
4811  switch ((re_opcode_t) *p1++)
4812    {
4813    case no_op:
4814    case begline:
4815    case endline:
4816    case begbuf:
4817    case endbuf:
4818    case wordbeg:
4819    case wordend:
4820    case wordbound:
4821    case notwordbound:
4822#ifdef emacs
4823    case before_dot:
4824    case at_dot:
4825    case after_dot:
4826#endif
4827      break;
4828
4829    case start_memory:
4830      reg_no = *p1;
4831      assert (reg_no > 0 && reg_no <= MAX_REGNUM);
4832      ret = group_match_null_string_p (&p1, end, reg_info);
4833     
4834      /* Have to set this here in case we're checking a group which
4835         contains a group and a back reference to it.  */
4836
4837      if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
4838        REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
4839
4840      if (!ret)
4841        return false;
4842      break;
4843         
4844    /* If this is an optimized succeed_n for zero times, make the jump.  */
4845    case jump:
4846      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4847      if (mcnt >= 0)
4848        p1 += mcnt;
4849      else
4850        return false;
4851      break;
4852
4853    case succeed_n:
4854      /* Get to the number of times to succeed.  */
4855      p1 += 2;         
4856      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4857
4858      if (mcnt == 0)
4859        {
4860          p1 -= 4;
4861          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4862          p1 += mcnt;
4863        }
4864      else
4865        return false;
4866      break;
4867
4868    case duplicate:
4869      if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
4870        return false;
4871      break;
4872
4873    case set_number_at:
4874      p1 += 4;
4875
4876    default:
4877      /* All other opcodes mean we cannot match the empty string.  */
4878      return false;
4879  }
4880
4881  *p = p1;
4882  return true;
4883} /* common_op_match_null_string_p */
4884
4885
4886/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
4887   bytes; nonzero otherwise.  */
4888   
4889static int
4890bcmp_translate (s1, s2, len, translate)
4891     unsigned char *s1, *s2;
4892     register int len;
4893     char *translate;
4894{
4895  register unsigned char *p1 = s1, *p2 = s2;
4896  while (len)
4897    {
4898      if (translate[*p1++] != translate[*p2++]) return 1;
4899      len--;
4900    }
4901  return 0;
4902}
4903
4904/* Entry points for GNU code.  */
4905
4906/* re_compile_pattern is the GNU regular expression compiler: it
4907   compiles PATTERN (of length SIZE) and puts the result in BUFP.
4908   Returns 0 if the pattern was valid, otherwise an error string.
4909   
4910   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
4911   are set in BUFP on entry.
4912   
4913   We call regex_compile to do the actual compilation.  */
4914
4915const char *
4916re_compile_pattern (pattern, length, bufp)
4917     const char *pattern;
4918     int length;
4919     struct re_pattern_buffer *bufp;
4920{
4921  reg_errcode_t ret;
4922 
4923  /* GNU code is written to assume at least RE_NREGS registers will be set
4924     (and at least one extra will be -1).  */
4925  bufp->regs_allocated = REGS_UNALLOCATED;
4926 
4927  /* And GNU code determines whether or not to get register information
4928     by passing null for the REGS argument to re_match, etc., not by
4929     setting no_sub.  */
4930  bufp->no_sub = 0;
4931 
4932  /* Match anchors at newline.  */
4933  bufp->newline_anchor = 1;
4934 
4935  ret = regex_compile (pattern, length, re_syntax_options, bufp);
4936
4937  return re_error_msg[(int) ret];
4938}     
4939
4940/* Entry points compatible with 4.2 BSD regex library.  We don't define
4941   them if this is an Emacs or POSIX compilation.  */
4942
4943#if !defined (emacs) && !defined (_POSIX_SOURCE)
4944
4945/* BSD has one and only one pattern buffer.  */
4946static struct re_pattern_buffer re_comp_buf;
4947
4948char *
4949re_comp (s)
4950    const char *s;
4951{
4952  reg_errcode_t ret;
4953 
4954  if (!s)
4955    {
4956      if (!re_comp_buf.buffer)
4957        return "No previous regular expression";
4958      return 0;
4959    }
4960
4961  if (!re_comp_buf.buffer)
4962    {
4963      re_comp_buf.buffer = (unsigned char *) malloc (200);
4964      if (re_comp_buf.buffer == NULL)
4965        return "Memory exhausted";
4966      re_comp_buf.allocated = 200;
4967
4968      re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
4969      if (re_comp_buf.fastmap == NULL)
4970        return "Memory exhausted";
4971    }
4972
4973  /* Since `re_exec' always passes NULL for the `regs' argument, we
4974     don't need to initialize the pattern buffer fields which affect it.  */
4975
4976  /* Match anchors at newlines.  */
4977  re_comp_buf.newline_anchor = 1;
4978
4979  ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
4980 
4981  /* Yes, we're discarding `const' here.  */
4982  return (char *) re_error_msg[(int) ret];
4983}
4984
4985
4986int
4987re_exec (s)
4988    const char *s;
4989{
4990  const int len = strlen (s);
4991  return
4992    0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
4993}
4994#endif /* not emacs and not _POSIX_SOURCE */
4995
4996/* POSIX.2 functions.  Don't define these for Emacs.  */
4997
4998#ifndef emacs
4999
5000/* regcomp takes a regular expression as a string and compiles it.
5001
5002   PREG is a regex_t *.  We do not expect any fields to be initialized,
5003   since POSIX says we shouldn't.  Thus, we set
5004
5005     `buffer' to the compiled pattern;
5006     `used' to the length of the compiled pattern;
5007     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
5008       REG_EXTENDED bit in CFLAGS is set; otherwise, to
5009       RE_SYNTAX_POSIX_BASIC;
5010     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
5011     `fastmap' and `fastmap_accurate' to zero;
5012     `re_nsub' to the number of subexpressions in PATTERN.
5013
5014   PATTERN is the address of the pattern string.
5015
5016   CFLAGS is a series of bits which affect compilation.
5017
5018     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
5019     use POSIX basic syntax.
5020
5021     If REG_NEWLINE is set, then . and [^...] don't match newline.
5022     Also, regexec will try a match beginning after every newline.
5023
5024     If REG_ICASE is set, then we considers upper- and lowercase
5025     versions of letters to be equivalent when matching.
5026
5027     If REG_NOSUB is set, then when PREG is passed to regexec, that
5028     routine will report only success or failure, and nothing about the
5029     registers.
5030
5031   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
5032   the return codes and their meanings.)  */
5033
5034int
5035regcomp (preg, pattern, cflags)
5036    regex_t *preg;
5037    const char *pattern;
5038    int cflags;
5039{
5040  reg_errcode_t ret;
5041  unsigned syntax
5042    = (cflags & REG_EXTENDED) ?
5043      RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
5044
5045  /* regex_compile will allocate the space for the compiled pattern.  */
5046  preg->buffer = 0;
5047  preg->allocated = 0;
5048  preg->used = 0;
5049 
5050  /* Don't bother to use a fastmap when searching.  This simplifies the
5051     REG_NEWLINE case: if we used a fastmap, we'd have to put all the
5052     characters after newlines into the fastmap.  This way, we just try
5053     every character.  */
5054  preg->fastmap = 0;
5055 
5056  if (cflags & REG_ICASE)
5057    {
5058      unsigned i;
5059     
5060      preg->translate = (char *) malloc (CHAR_SET_SIZE);
5061      if (preg->translate == NULL)
5062        return (int) REG_ESPACE;
5063
5064      /* Map uppercase characters to corresponding lowercase ones.  */
5065      for (i = 0; i < CHAR_SET_SIZE; i++)
5066        preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
5067    }
5068  else
5069    preg->translate = NULL;
5070
5071  /* If REG_NEWLINE is set, newlines are treated differently.  */
5072  if (cflags & REG_NEWLINE)
5073    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
5074      syntax &= ~RE_DOT_NEWLINE;
5075      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
5076      /* It also changes the matching behavior.  */
5077      preg->newline_anchor = 1;
5078    }
5079  else
5080    preg->newline_anchor = 0;
5081
5082  preg->no_sub = !!(cflags & REG_NOSUB);
5083
5084  /* POSIX says a null character in the pattern terminates it, so we
5085     can use strlen here in compiling the pattern.  */
5086  ret = regex_compile (pattern, strlen (pattern), syntax, preg);
5087 
5088  /* POSIX doesn't distinguish between an unmatched open-group and an
5089     unmatched close-group: both are REG_EPAREN.  */
5090  if (ret == REG_ERPAREN) ret = REG_EPAREN;
5091 
5092  return (int) ret;
5093}
5094
5095
5096/* regexec searches for a given pattern, specified by PREG, in the
5097   string STRING.
5098   
5099   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
5100   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
5101   least NMATCH elements, and we set them to the offsets of the
5102   corresponding matched substrings.
5103   
5104   EFLAGS specifies `execution flags' which affect matching: if
5105   REG_NOTBOL is set, then ^ does not match at the beginning of the
5106   string; if REG_NOTEOL is set, then $ does not match at the end.
5107   
5108   We return 0 if we find a match and REG_NOMATCH if not.  */
5109
5110int
5111regexec (preg, string, nmatch, pmatch, eflags)
5112    const regex_t *preg;
5113    const char *string;
5114    size_t nmatch;
5115    regmatch_t pmatch[];
5116    int eflags;
5117{
5118  int ret;
5119  struct re_registers regs;
5120  regex_t private_preg;
5121  int len = strlen (string);
5122  boolean want_reg_info = !preg->no_sub && nmatch > 0;
5123
5124  private_preg = *preg;
5125 
5126  private_preg.not_bol = !!(eflags & REG_NOTBOL);
5127  private_preg.not_eol = !!(eflags & REG_NOTEOL);
5128 
5129  /* The user has told us exactly how many registers to return
5130     information about, via `nmatch'.  We have to pass that on to the
5131     matching routines.  */
5132  private_preg.regs_allocated = REGS_FIXED;
5133 
5134  if (want_reg_info)
5135    {
5136      regs.num_regs = nmatch;
5137      regs.start = TALLOC (nmatch, regoff_t);
5138      regs.end = TALLOC (nmatch, regoff_t);
5139      if (regs.start == NULL || regs.end == NULL)
5140        return (int) REG_NOMATCH;
5141    }
5142
5143  /* Perform the searching operation.  */
5144  ret = re_search (&private_preg, string, len,
5145                   /* start: */ 0, /* range: */ len,
5146                   want_reg_info ? &regs : (struct re_registers *) 0);
5147 
5148  /* Copy the register information to the POSIX structure.  */
5149  if (want_reg_info)
5150    {
5151      if (ret >= 0)
5152        {
5153          unsigned r;
5154
5155          for (r = 0; r < nmatch; r++)
5156            {
5157              pmatch[r].rm_so = regs.start[r];
5158              pmatch[r].rm_eo = regs.end[r];
5159            }
5160        }
5161
5162      /* If we needed the temporary register info, free the space now.  */
5163      free (regs.start);
5164      free (regs.end);
5165    }
5166
5167  /* We want zero return to mean success, unlike `re_search'.  */
5168  return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
5169}
5170
5171
5172/* Returns a message corresponding to an error code, ERRCODE, returned
5173   from either regcomp or regexec.   We don't use PREG here.  */
5174
5175size_t
5176regerror (errcode, preg, errbuf, errbuf_size)
5177    int errcode;
5178    const regex_t *preg;
5179    char *errbuf;
5180    size_t errbuf_size;
5181{
5182  const char *msg;
5183  size_t msg_size;
5184
5185  if (errcode < 0
5186      || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
5187    /* Only error codes returned by the rest of the code should be passed
5188       to this routine.  If we are given anything else, or if other regex
5189       code generates an invalid error code, then the program has a bug.
5190       Dump core so we can fix it.  */
5191    abort ();
5192
5193  msg = re_error_msg[errcode];
5194
5195  /* POSIX doesn't require that we do anything in this case, but why
5196     not be nice.  */
5197  if (! msg)
5198    msg = "Success";
5199
5200  msg_size = strlen (msg) + 1; /* Includes the null.  */
5201 
5202  if (errbuf_size != 0)
5203    {
5204      if (msg_size > errbuf_size)
5205        {
5206          strncpy (errbuf, msg, errbuf_size - 1);
5207          errbuf[errbuf_size - 1] = 0;
5208        }
5209      else
5210        strcpy (errbuf, msg);
5211    }
5212
5213  return msg_size;
5214}
5215
5216
5217/* Free dynamically allocated space used by PREG.  */
5218
5219void
5220regfree (preg)
5221    regex_t *preg;
5222{
5223  if (preg->buffer != NULL)
5224    free (preg->buffer);
5225  preg->buffer = NULL;
5226 
5227  preg->allocated = 0;
5228  preg->used = 0;
5229
5230  if (preg->fastmap != NULL)
5231    free (preg->fastmap);
5232  preg->fastmap = NULL;
5233  preg->fastmap_accurate = 0;
5234
5235  if (preg->translate != NULL)
5236    free (preg->translate);
5237  preg->translate = NULL;
5238}
5239
5240#endif /* not emacs  */
5241
5242/*
5243Local variables:
5244make-backup-files: t
5245version-control: t
5246trim-versions-without-asking: nil
5247End:
5248*/
Note: See TracBrowser for help on using the repository browser.