source: trunk/third/pcre/pcre.c @ 19309

Revision 19309, 149.6 KB checked in by ghudson, 22 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r19308, which included commits to RCS files with non-trunk default branches.
Line 
1/*************************************************
2*      Perl-Compatible Regular Expressions       *
3*************************************************/
4
5/*
6This is a library of functions to support regular expressions whose syntax
7and semantics are as close as possible to those of the Perl 5 language. See
8the file Tech.Notes for some information on the internals.
9
10Written by: Philip Hazel <ph10@cam.ac.uk>
11
12           Copyright (c) 1997-2001 University of Cambridge
13
14-----------------------------------------------------------------------------
15Permission is granted to anyone to use this software for any purpose on any
16computer system, and to redistribute it freely, subject to the following
17restrictions:
18
191. This software is distributed in the hope that it will be useful,
20   but WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
22
232. The origin of this software must not be misrepresented, either by
24   explicit claim or by omission.
25
263. Altered versions must be plainly marked as such, and must not be
27   misrepresented as being the original software.
28
294. If PCRE is embedded in any software that is released under the GNU
30   General Purpose Licence (GPL), then the terms of that licence shall
31   supersede any condition above with which it is incompatible.
32-----------------------------------------------------------------------------
33*/
34
35
36/* Define DEBUG to get debugging output on stdout. */
37
38/* #define DEBUG */
39
40/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
41inline, and there are *still* stupid compilers about that don't like indented
42pre-processor statements. I suppose it's only been 10 years... */
43
44#ifdef DEBUG
45#define DPRINTF(p) printf p
46#else
47#define DPRINTF(p) /*nothing*/
48#endif
49
50/* Include the internals header, which itself includes Standard C headers plus
51the external pcre header. */
52
53#include "internal.h"
54
55
56/* Allow compilation as C++ source code, should anybody want to do that. */
57
58#ifdef __cplusplus
59#define class pcre_class
60#endif
61
62
63/* Maximum number of items on the nested bracket stacks at compile time. This
64applies to the nesting of all kinds of parentheses. It does not limit
65un-nested, non-capturing parentheses. This number can be made bigger if
66necessary - it is used to dimension one int and one unsigned char vector at
67compile time. */
68
69#define BRASTACK_SIZE 200
70
71
72/* The number of bytes in a literal character string above which we can't add
73any more is different when UTF-8 characters may be encountered. */
74
75#ifdef SUPPORT_UTF8
76#define MAXLIT 250
77#else
78#define MAXLIT 255
79#endif
80
81
82/* Min and max values for the common repeats; for the maxima, 0 => infinity */
83
84static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
85static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
86
87/* Text forms of OP_ values and things, for debugging (not all used) */
88
89#ifdef DEBUG
90static const char *OP_names[] = {
91  "End", "\\A", "\\B", "\\b", "\\D", "\\d",
92  "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
93  "Opt", "^", "$", "Any", "chars", "not",
94  "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
95  "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
96  "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
97  "*", "*?", "+", "+?", "?", "??", "{", "{",
98  "class", "Ref", "Recurse",
99  "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
100  "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
101  "Brazero", "Braminzero", "Branumber", "Bra"
102};
103#endif
104
105/* Table for handling escaped characters in the range '0'-'z'. Positive returns
106are simple data values; negative values are for special things like \d and so
107on. Zero means further processing is needed (for things like \x), or the escape
108is invalid. */
109
110static const short int escapes[] = {
111    0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */
112    0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */
113  '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */
114    0,      0,      0,      0,      0,      0,      0,      0,   /* H - O */
115    0,      0,      0, -ESC_S,      0,      0,      0, -ESC_W,   /* P - W */
116    0,      0, -ESC_Z,    '[',   '\\',    ']',    '^',    '_',   /* X - _ */
117  '`',      7, -ESC_b,      0, -ESC_d,  ESC_E,  ESC_F,      0,   /* ` - g */
118    0,      0,      0,      0,      0,      0,  ESC_N,      0,   /* h - o */
119    0,      0,  ESC_R, -ESC_s,  ESC_T,      0,      0, -ESC_w,   /* p - w */
120    0,      0, -ESC_z                                            /* x - z */
121};
122
123/* Tables of names of POSIX character classes and their lengths. The list is
124terminated by a zero length entry. The first three must be alpha, upper, lower,
125as this is assumed for handling case independence. */
126
127static const char *posix_names[] = {
128  "alpha", "lower", "upper",
129  "alnum", "ascii", "cntrl", "digit", "graph",
130  "print", "punct", "space", "word",  "xdigit" };
131
132static const uschar posix_name_lengths[] = {
133  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
134
135/* Table of class bit maps for each POSIX class; up to three may be combined
136to form the class. */
137
138static const int posix_class_maps[] = {
139  cbit_lower, cbit_upper, -1,             /* alpha */
140  cbit_lower, -1,         -1,             /* lower */
141  cbit_upper, -1,         -1,             /* upper */
142  cbit_digit, cbit_lower, cbit_upper,     /* alnum */
143  cbit_print, cbit_cntrl, -1,             /* ascii */
144  cbit_cntrl, -1,         -1,             /* cntrl */
145  cbit_digit, -1,         -1,             /* digit */
146  cbit_graph, -1,         -1,             /* graph */
147  cbit_print, -1,         -1,             /* print */
148  cbit_punct, -1,         -1,             /* punct */
149  cbit_space, -1,         -1,             /* space */
150  cbit_word,  -1,         -1,             /* word */
151  cbit_xdigit,-1,         -1              /* xdigit */
152};
153
154
155/* Definition to allow mutual recursion */
156
157static BOOL
158  compile_regex(int, int, int *, uschar **, const uschar **, const char **,
159    BOOL, int, int *, int *, compile_data *);
160
161/* Structure for building a chain of data that actually lives on the
162stack, for holding the values of the subject pointer at the start of each
163subpattern, so as to detect when an empty string has been matched by a
164subpattern - to break infinite loops. */
165
166typedef struct eptrblock {
167  struct eptrblock *prev;
168  const uschar *saved_eptr;
169} eptrblock;
170
171/* Flag bits for the match() function */
172
173#define match_condassert   0x01    /* Called to check a condition assertion */
174#define match_isgroup      0x02    /* Set if start of bracketed group */
175
176
177
178/*************************************************
179*               Global variables                 *
180*************************************************/
181
182/* PCRE is thread-clean and doesn't use any global variables in the normal
183sense. However, it calls memory allocation and free functions via the two
184indirections below, which are can be changed by the caller, but are shared
185between all threads. */
186
187void *(*pcre_malloc)(size_t) = malloc;
188void  (*pcre_free)(void *) = free;
189
190
191
192/*************************************************
193*    Macros and tables for character handling    *
194*************************************************/
195
196/* When UTF-8 encoding is being used, a character is no longer just a single
197byte. The macros for character handling generate simple sequences when used in
198byte-mode, and more complicated ones for UTF-8 characters. */
199
200#ifndef SUPPORT_UTF8
201#define GETCHARINC(c, eptr) c = *eptr++;
202#define GETCHARLEN(c, eptr, len) c = *eptr;
203#define BACKCHAR(eptr)
204
205#else   /* SUPPORT_UTF8 */
206
207/* Get the next UTF-8 character, advancing the pointer */
208
209#define GETCHARINC(c, eptr) \
210  c = *eptr++; \
211  if (md->utf8 && (c & 0xc0) == 0xc0) \
212    { \
213    int a = utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
214    int s = 6*a; \
215    c = (c & utf8_table3[a]) << s; \
216    while (a-- > 0) \
217      { \
218      s -= 6; \
219      c |= (*eptr++ & 0x3f) << s; \
220      } \
221    }
222
223/* Get the next UTF-8 character, not advancing the pointer, setting length */
224
225#define GETCHARLEN(c, eptr, len) \
226  c = *eptr; \
227  len = 1; \
228  if (md->utf8 && (c & 0xc0) == 0xc0) \
229    { \
230    int i; \
231    int a = utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
232    int s = 6*a; \
233    c = (c & utf8_table3[a]) << s; \
234    for (i = 1; i <= a; i++) \
235      { \
236      s -= 6; \
237      c |= (eptr[i] & 0x3f) << s; \
238      } \
239    len += a; \
240    }
241
242/* If the pointer is not at the start of a character, move it back until
243it is. */
244
245#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--;
246
247#endif
248
249
250
251/*************************************************
252*             Default character tables           *
253*************************************************/
254
255/* A default set of character tables is included in the PCRE binary. Its source
256is built by the maketables auxiliary program, which uses the default C ctypes
257functions, and put in the file chartables.c. These tables are used by PCRE
258whenever the caller of pcre_compile() does not provide an alternate set of
259tables. */
260
261#include "chartables.c"
262
263
264
265#ifdef SUPPORT_UTF8
266/*************************************************
267*           Tables for UTF-8 support             *
268*************************************************/
269
270/* These are the breakpoints for different numbers of bytes in a UTF-8
271character. */
272
273static int utf8_table1[] = { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
274
275/* These are the indicator bits and the mask for the data bits to set in the
276first byte of a character, indexed by the number of additional bytes. */
277
278static int utf8_table2[] = { 0,    0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
279static int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
280
281/* Table of the number of extra characters, indexed by the first character
282masked with 0x3f. The highest number for a valid UTF-8 character is in fact
2830x3d. */
284
285static uschar utf8_table4[] = {
286  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
287  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
288  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
289  3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
290
291
292/*************************************************
293*       Convert character value to UTF-8         *
294*************************************************/
295
296/* This function takes an integer value in the range 0 - 0x7fffffff
297and encodes it as a UTF-8 character in 0 to 6 bytes.
298
299Arguments:
300  cvalue     the character value
301  buffer     pointer to buffer for result - at least 6 bytes long
302
303Returns:     number of characters placed in the buffer
304*/
305
306static int
307ord2utf8(int cvalue, uschar *buffer)
308{
309register int i, j;
310for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
311  if (cvalue <= utf8_table1[i]) break;
312buffer += i;
313for (j = i; j > 0; j--)
314 {
315 *buffer-- = 0x80 | (cvalue & 0x3f);
316 cvalue >>= 6;
317 }
318*buffer = utf8_table2[i] | cvalue;
319return i + 1;
320}
321#endif
322
323
324
325/*************************************************
326*          Return version string                 *
327*************************************************/
328
329#define STRING(a)  # a
330#define XSTRING(s) STRING(s)
331
332const char *
333pcre_version(void)
334{
335return XSTRING(PCRE_MAJOR) "." XSTRING(PCRE_MINOR) " " XSTRING(PCRE_DATE);
336}
337
338
339
340
341/*************************************************
342* (Obsolete) Return info about compiled pattern  *
343*************************************************/
344
345/* This is the original "info" function. It picks potentially useful data out
346of the private structure, but its interface was too rigid. It remains for
347backwards compatibility. The public options are passed back in an int - though
348the re->options field has been expanded to a long int, all the public options
349at the low end of it, and so even on 16-bit systems this will still be OK.
350Therefore, I haven't changed the API for pcre_info().
351
352Arguments:
353  external_re   points to compiled code
354  optptr        where to pass back the options
355  first_char    where to pass back the first character,
356                or -1 if multiline and all branches start ^,
357                or -2 otherwise
358
359Returns:        number of capturing subpatterns
360                or negative values on error
361*/
362
363int
364pcre_info(const pcre *external_re, int *optptr, int *first_char)
365{
366const real_pcre *re = (const real_pcre *)external_re;
367if (re == NULL) return PCRE_ERROR_NULL;
368if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
369if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS);
370if (first_char != NULL)
371  *first_char = ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :
372     ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
373return re->top_bracket;
374}
375
376
377
378/*************************************************
379*        Return info about compiled pattern      *
380*************************************************/
381
382/* This is a newer "info" function which has an extensible interface so
383that additional items can be added compatibly.
384
385Arguments:
386  external_re      points to compiled code
387  external_study   points to study data, or NULL
388  what             what information is required
389  where            where to put the information
390
391Returns:           0 if data returned, negative on error
392*/
393
394int
395pcre_fullinfo(const pcre *external_re, const pcre_extra *study_data, int what,
396  void *where)
397{
398const real_pcre *re = (const real_pcre *)external_re;
399const real_pcre_extra *study = (const real_pcre_extra *)study_data;
400
401if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
402if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
403
404switch (what)
405  {
406  case PCRE_INFO_OPTIONS:
407  *((unsigned long int *)where) = re->options & PUBLIC_OPTIONS;
408  break;
409
410  case PCRE_INFO_SIZE:
411  *((size_t *)where) = re->size;
412  break;
413
414  case PCRE_INFO_CAPTURECOUNT:
415  *((int *)where) = re->top_bracket;
416  break;
417
418  case PCRE_INFO_BACKREFMAX:
419  *((int *)where) = re->top_backref;
420  break;
421
422  case PCRE_INFO_FIRSTCHAR:
423  *((int *)where) =
424    ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :
425    ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
426  break;
427
428  case PCRE_INFO_FIRSTTABLE:
429  *((const uschar **)where) =
430    (study != NULL && (study->options & PCRE_STUDY_MAPPED) != 0)?
431      study->start_bits : NULL;
432  break;
433
434  case PCRE_INFO_LASTLITERAL:
435  *((int *)where) =
436    ((re->options & PCRE_REQCHSET) != 0)? re->req_char : -1;
437  break;
438
439  default: return PCRE_ERROR_BADOPTION;
440  }
441
442return 0;
443}
444
445
446
447#ifdef DEBUG
448/*************************************************
449*        Debugging function to print chars       *
450*************************************************/
451
452/* Print a sequence of chars in printable format, stopping at the end of the
453subject if the requested.
454
455Arguments:
456  p           points to characters
457  length      number to print
458  is_subject  TRUE if printing from within md->start_subject
459  md          pointer to matching data block, if is_subject is TRUE
460
461Returns:     nothing
462*/
463
464static void
465pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
466{
467int c;
468if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
469while (length-- > 0)
470  if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
471}
472#endif
473
474
475
476
477/*************************************************
478*            Handle escapes                      *
479*************************************************/
480
481/* This function is called when a \ has been encountered. It either returns a
482positive value for a simple escape such as \n, or a negative value which
483encodes one of the more complicated things such as \d. When UTF-8 is enabled,
484a positive value greater than 255 may be returned. On entry, ptr is pointing at
485the \. On exit, it is on the final character of the escape sequence.
486
487Arguments:
488  ptrptr     points to the pattern position pointer
489  errorptr   points to the pointer to the error message
490  bracount   number of previous extracting brackets
491  options    the options bits
492  isclass    TRUE if inside a character class
493  cd         pointer to char tables block
494
495Returns:     zero or positive => a data character
496             negative => a special escape sequence
497             on error, errorptr is set
498*/
499
500static int
501check_escape(const uschar **ptrptr, const char **errorptr, int bracount,
502  int options, BOOL isclass, compile_data *cd)
503{
504const uschar *ptr = *ptrptr;
505int c, i;
506
507/* If backslash is at the end of the pattern, it's an error. */
508
509c = *(++ptr);
510if (c == 0) *errorptr = ERR1;
511
512/* Digits or letters may have special meaning; all others are literals. */
513
514else if (c < '0' || c > 'z') {}
515
516/* Do an initial lookup in a table. A non-zero result is something that can be
517returned immediately. Otherwise further processing may be required. */
518
519else if ((i = escapes[c - '0']) != 0) c = i;
520
521/* Escapes that need further processing, or are illegal. */
522
523else
524  {
525  const uschar *oldptr;
526  switch (c)
527    {
528    /* The handling of escape sequences consisting of a string of digits
529    starting with one that is not zero is not straightforward. By experiment,
530    the way Perl works seems to be as follows:
531
532    Outside a character class, the digits are read as a decimal number. If the
533    number is less than 10, or if there are that many previous extracting
534    left brackets, then it is a back reference. Otherwise, up to three octal
535    digits are read to form an escaped byte. Thus \123 is likely to be octal
536    123 (cf \0123, which is octal 012 followed by the literal 3). If the octal
537    value is greater than 377, the least significant 8 bits are taken. Inside a
538    character class, \ followed by a digit is always an octal number. */
539
540    case '1': case '2': case '3': case '4': case '5':
541    case '6': case '7': case '8': case '9':
542
543    if (!isclass)
544      {
545      oldptr = ptr;
546      c -= '0';
547      while ((cd->ctypes[ptr[1]] & ctype_digit) != 0)
548        c = c * 10 + *(++ptr) - '0';
549      if (c < 10 || c <= bracount)
550        {
551        c = -(ESC_REF + c);
552        break;
553        }
554      ptr = oldptr;      /* Put the pointer back and fall through */
555      }
556
557    /* Handle an octal number following \. If the first digit is 8 or 9, Perl
558    generates a binary zero byte and treats the digit as a following literal.
559    Thus we have to pull back the pointer by one. */
560
561    if ((c = *ptr) >= '8')
562      {
563      ptr--;
564      c = 0;
565      break;
566      }
567
568    /* \0 always starts an octal number, but we may drop through to here with a
569    larger first octal digit. */
570
571    case '0':
572    c -= '0';
573    while(i++ < 2 && (cd->ctypes[ptr[1]] & ctype_digit) != 0 &&
574      ptr[1] != '8' && ptr[1] != '9')
575        c = c * 8 + *(++ptr) - '0';
576    c &= 255;     /* Take least significant 8 bits */
577    break;
578
579    /* \x is complicated when UTF-8 is enabled. \x{ddd} is a character number
580    which can be greater than 0xff, but only if the ddd are hex digits. */
581
582    case 'x':
583#ifdef SUPPORT_UTF8
584    if (ptr[1] == '{' && (options & PCRE_UTF8) != 0)
585      {
586      const uschar *pt = ptr + 2;
587      register int count = 0;
588      c = 0;
589      while ((cd->ctypes[*pt] & ctype_xdigit) != 0)
590        {
591        count++;
592        c = c * 16 + cd->lcc[*pt] -
593          (((cd->ctypes[*pt] & ctype_digit) != 0)? '0' : 'W');
594        pt++;
595        }
596      if (*pt == '}')
597        {
598        if (c < 0 || count > 8) *errorptr = ERR34;
599        ptr = pt;
600        break;
601        }
602      /* If the sequence of hex digits does not end with '}', then we don't
603      recognize this construct; fall through to the normal \x handling. */
604      }
605#endif
606
607    /* Read just a single hex char */
608
609    c = 0;
610    while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_xdigit) != 0)
611      {
612      ptr++;
613      c = c * 16 + cd->lcc[*ptr] -
614        (((cd->ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');
615      }
616    break;
617
618    /* Other special escapes not starting with a digit are straightforward */
619
620    case 'c':
621    c = *(++ptr);
622    if (c == 0)
623      {
624      *errorptr = ERR2;
625      return 0;
626      }
627
628    /* A letter is upper-cased; then the 0x40 bit is flipped */
629
630    if (c >= 'a' && c <= 'z') c = cd->fcc[c];
631    c ^= 0x40;
632    break;
633
634    /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
635    other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,
636    for Perl compatibility, it is a literal. This code looks a bit odd, but
637    there used to be some cases other than the default, and there may be again
638    in future, so I haven't "optimized" it. */
639
640    default:
641    if ((options & PCRE_EXTRA) != 0) switch(c)
642      {
643      default:
644      *errorptr = ERR3;
645      break;
646      }
647    break;
648    }
649  }
650
651*ptrptr = ptr;
652return c;
653}
654
655
656
657/*************************************************
658*            Check for counted repeat            *
659*************************************************/
660
661/* This function is called when a '{' is encountered in a place where it might
662start a quantifier. It looks ahead to see if it really is a quantifier or not.
663It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd}
664where the ddds are digits.
665
666Arguments:
667  p         pointer to the first char after '{'
668  cd        pointer to char tables block
669
670Returns:    TRUE or FALSE
671*/
672
673static BOOL
674is_counted_repeat(const uschar *p, compile_data *cd)
675{
676if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;
677while ((cd->ctypes[*p] & ctype_digit) != 0) p++;
678if (*p == '}') return TRUE;
679
680if (*p++ != ',') return FALSE;
681if (*p == '}') return TRUE;
682
683if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;
684while ((cd->ctypes[*p] & ctype_digit) != 0) p++;
685return (*p == '}');
686}
687
688
689
690/*************************************************
691*         Read repeat counts                     *
692*************************************************/
693
694/* Read an item of the form {n,m} and return the values. This is called only
695after is_counted_repeat() has confirmed that a repeat-count quantifier exists,
696so the syntax is guaranteed to be correct, but we need to check the values.
697
698Arguments:
699  p          pointer to first char after '{'
700  minp       pointer to int for min
701  maxp       pointer to int for max
702             returned as -1 if no max
703  errorptr   points to pointer to error message
704  cd         pointer to character tables clock
705
706Returns:     pointer to '}' on success;
707             current ptr on error, with errorptr set
708*/
709
710static const uschar *
711read_repeat_counts(const uschar *p, int *minp, int *maxp,
712  const char **errorptr, compile_data *cd)
713{
714int min = 0;
715int max = -1;
716
717while ((cd->ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';
718
719if (*p == '}') max = min; else
720  {
721  if (*(++p) != '}')
722    {
723    max = 0;
724    while((cd->ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';
725    if (max < min)
726      {
727      *errorptr = ERR4;
728      return p;
729      }
730    }
731  }
732
733/* Do paranoid checks, then fill in the required variables, and pass back the
734pointer to the terminating '}'. */
735
736if (min > 65535 || max > 65535)
737  *errorptr = ERR5;
738else
739  {
740  *minp = min;
741  *maxp = max;
742  }
743return p;
744}
745
746
747
748/*************************************************
749*        Find the fixed length of a pattern      *
750*************************************************/
751
752/* Scan a pattern and compute the fixed length of subject that will match it,
753if the length is fixed. This is needed for dealing with backward assertions.
754
755Arguments:
756  code     points to the start of the pattern (the bracket)
757  options  the compiling options
758
759Returns:   the fixed length, or -1 if there is no fixed length
760*/
761
762static int
763find_fixedlength(uschar *code, int options)
764{
765int length = -1;
766
767register int branchlength = 0;
768register uschar *cc = code + 3;
769
770/* Scan along the opcodes for this branch. If we get to the end of the
771branch, check the length against that of the other branches. */
772
773for (;;)
774  {
775  int d;
776  register int op = *cc;
777  if (op >= OP_BRA) op = OP_BRA;
778
779  switch (op)
780    {
781    case OP_BRA:
782    case OP_ONCE:
783    case OP_COND:
784    d = find_fixedlength(cc, options);
785    if (d < 0) return -1;
786    branchlength += d;
787    do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
788    cc += 3;
789    break;
790
791    /* Reached end of a branch; if it's a ket it is the end of a nested
792    call. If it's ALT it is an alternation in a nested call. If it is
793    END it's the end of the outer call. All can be handled by the same code. */
794
795    case OP_ALT:
796    case OP_KET:
797    case OP_KETRMAX:
798    case OP_KETRMIN:
799    case OP_END:
800    if (length < 0) length = branchlength;
801      else if (length != branchlength) return -1;
802    if (*cc != OP_ALT) return length;
803    cc += 3;
804    branchlength = 0;
805    break;
806
807    /* Skip over assertive subpatterns */
808
809    case OP_ASSERT:
810    case OP_ASSERT_NOT:
811    case OP_ASSERTBACK:
812    case OP_ASSERTBACK_NOT:
813    do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
814    cc += 3;
815    break;
816
817    /* Skip over things that don't match chars */
818
819    case OP_REVERSE:
820    case OP_BRANUMBER:
821    case OP_CREF:
822    cc++;
823    /* Fall through */
824
825    case OP_OPT:
826    cc++;
827    /* Fall through */
828
829    case OP_SOD:
830    case OP_EOD:
831    case OP_EODN:
832    case OP_CIRC:
833    case OP_DOLL:
834    case OP_NOT_WORD_BOUNDARY:
835    case OP_WORD_BOUNDARY:
836    cc++;
837    break;
838
839    /* Handle char strings. In UTF-8 mode we must count characters, not bytes.
840    This requires a scan of the string, unfortunately. We assume valid UTF-8
841    strings, so all we do is reduce the length by one for byte whose bits are
842    10xxxxxx. */
843
844    case OP_CHARS:
845    branchlength += *(++cc);
846#ifdef SUPPORT_UTF8
847    for (d = 1; d <= *cc; d++)
848      if ((cc[d] & 0xc0) == 0x80) branchlength--;
849#endif
850    cc += *cc + 1;
851    break;
852
853    /* Handle exact repetitions */
854
855    case OP_EXACT:
856    case OP_TYPEEXACT:
857    branchlength += (cc[1] << 8) + cc[2];
858    cc += 4;
859    break;
860
861    /* Handle single-char matchers */
862
863    case OP_NOT_DIGIT:
864    case OP_DIGIT:
865    case OP_NOT_WHITESPACE:
866    case OP_WHITESPACE:
867    case OP_NOT_WORDCHAR:
868    case OP_WORDCHAR:
869    case OP_ANY:
870    branchlength++;
871    cc++;
872    break;
873
874
875    /* Check a class for variable quantification */
876
877    case OP_CLASS:
878    cc += 33;
879
880    switch (*cc)
881      {
882      case OP_CRSTAR:
883      case OP_CRMINSTAR:
884      case OP_CRQUERY:
885      case OP_CRMINQUERY:
886      return -1;
887
888      case OP_CRRANGE:
889      case OP_CRMINRANGE:
890      if ((cc[1] << 8) + cc[2] != (cc[3] << 8) + cc[4]) return -1;
891      branchlength += (cc[1] << 8) + cc[2];
892      cc += 5;
893      break;
894
895      default:
896      branchlength++;
897      }
898    break;
899
900    /* Anything else is variable length */
901
902    default:
903    return -1;
904    }
905  }
906/* Control never gets here */
907}
908
909
910
911
912/*************************************************
913*           Check for POSIX class syntax         *
914*************************************************/
915
916/* This function is called when the sequence "[:" or "[." or "[=" is
917encountered in a character class. It checks whether this is followed by an
918optional ^ and then a sequence of letters, terminated by a matching ":]" or
919".]" or "=]".
920
921Argument:
922  ptr      pointer to the initial [
923  endptr   where to return the end pointer
924  cd       pointer to compile data
925
926Returns:   TRUE or FALSE
927*/
928
929static BOOL
930check_posix_syntax(const uschar *ptr, const uschar **endptr, compile_data *cd)
931{
932int terminator;          /* Don't combine these lines; the Solaris cc */
933terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
934if (*(++ptr) == '^') ptr++;
935while ((cd->ctypes[*ptr] & ctype_letter) != 0) ptr++;
936if (*ptr == terminator && ptr[1] == ']')
937  {
938  *endptr = ptr;
939  return TRUE;
940  }
941return FALSE;
942}
943
944
945
946
947/*************************************************
948*          Check POSIX class name                *
949*************************************************/
950
951/* This function is called to check the name given in a POSIX-style class entry
952such as [:alnum:].
953
954Arguments:
955  ptr        points to the first letter
956  len        the length of the name
957
958Returns:     a value representing the name, or -1 if unknown
959*/
960
961static int
962check_posix_name(const uschar *ptr, int len)
963{
964register int yield = 0;
965while (posix_name_lengths[yield] != 0)
966  {
967  if (len == posix_name_lengths[yield] &&
968    strncmp((const char *)ptr, posix_names[yield], len) == 0) return yield;
969  yield++;
970  }
971return -1;
972}
973
974
975
976
977/*************************************************
978*           Compile one branch                   *
979*************************************************/
980
981/* Scan the pattern, compiling it into the code vector.
982
983Arguments:
984  options      the option bits
985  brackets     points to number of extracting brackets used
986  code         points to the pointer to the current code point
987  ptrptr       points to the current pattern pointer
988  errorptr     points to pointer to error message
989  optchanged   set to the value of the last OP_OPT item compiled
990  reqchar      set to the last literal character required, else -1
991  countlits    set to count of mandatory literal characters
992  cd           contains pointers to tables
993
994Returns:       TRUE on success
995               FALSE, with *errorptr set on error
996*/
997
998static BOOL
999compile_branch(int options, int *brackets, uschar **codeptr,
1000  const uschar **ptrptr, const char **errorptr, int *optchanged,
1001  int *reqchar, int *countlits, compile_data *cd)
1002{
1003int repeat_type, op_type;
1004int repeat_min, repeat_max;
1005int bravalue, length;
1006int greedy_default, greedy_non_default;
1007int prevreqchar;
1008int condcount = 0;
1009int subcountlits = 0;
1010register int c;
1011register uschar *code = *codeptr;
1012uschar *tempcode;
1013const uschar *ptr = *ptrptr;
1014const uschar *tempptr;
1015uschar *previous = NULL;
1016uschar class[32];
1017
1018/* Set up the default and non-default settings for greediness */
1019
1020greedy_default = ((options & PCRE_UNGREEDY) != 0);
1021greedy_non_default = greedy_default ^ 1;
1022
1023/* Initialize no required char, and count of literals */
1024
1025*reqchar = prevreqchar = -1;
1026*countlits = 0;
1027
1028/* Switch on next character until the end of the branch */
1029
1030for (;; ptr++)
1031  {
1032  BOOL negate_class;
1033  int class_charcount;
1034  int class_lastchar;
1035  int newoptions;
1036  int skipbytes;
1037  int subreqchar;
1038
1039  c = *ptr;
1040  if ((options & PCRE_EXTENDED) != 0)
1041    {
1042    if ((cd->ctypes[c] & ctype_space) != 0) continue;
1043    if (c == '#')
1044      {
1045      /* The space before the ; is to avoid a warning on a silly compiler
1046      on the Macintosh. */
1047      while ((c = *(++ptr)) != 0 && c != NEWLINE) ;
1048      continue;
1049      }
1050    }
1051
1052  switch(c)
1053    {
1054    /* The branch terminates at end of string, |, or ). */
1055
1056    case 0:
1057    case '|':
1058    case ')':
1059    *codeptr = code;
1060    *ptrptr = ptr;
1061    return TRUE;
1062
1063    /* Handle single-character metacharacters */
1064
1065    case '^':
1066    previous = NULL;
1067    *code++ = OP_CIRC;
1068    break;
1069
1070    case '$':
1071    previous = NULL;
1072    *code++ = OP_DOLL;
1073    break;
1074
1075    case '.':
1076    previous = code;
1077    *code++ = OP_ANY;
1078    break;
1079
1080    /* Character classes. These always build a 32-byte bitmap of the permitted
1081    characters, except in the special case where there is only one character.
1082    For negated classes, we build the map as usual, then invert it at the end.
1083    */
1084
1085    case '[':
1086    previous = code;
1087    *code++ = OP_CLASS;
1088
1089    /* If the first character is '^', set the negation flag and skip it. */
1090
1091    if ((c = *(++ptr)) == '^')
1092      {
1093      negate_class = TRUE;
1094      c = *(++ptr);
1095      }
1096    else negate_class = FALSE;
1097
1098    /* Keep a count of chars so that we can optimize the case of just a single
1099    character. */
1100
1101    class_charcount = 0;
1102    class_lastchar = -1;
1103
1104    /* Initialize the 32-char bit map to all zeros. We have to build the
1105    map in a temporary bit of store, in case the class contains only 1
1106    character, because in that case the compiled code doesn't use the
1107    bit map. */
1108
1109    memset(class, 0, 32 * sizeof(uschar));
1110
1111    /* Process characters until ] is reached. By writing this as a "do" it
1112    means that an initial ] is taken as a data character. */
1113
1114    do
1115      {
1116      if (c == 0)
1117        {
1118        *errorptr = ERR6;
1119        goto FAILED;
1120        }
1121
1122      /* Handle POSIX class names. Perl allows a negation extension of the
1123      form [:^name]. A square bracket that doesn't match the syntax is
1124      treated as a literal. We also recognize the POSIX constructions
1125      [.ch.] and [=ch=] ("collating elements") and fault them, as Perl
1126      5.6 does. */
1127
1128      if (c == '[' &&
1129          (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
1130          check_posix_syntax(ptr, &tempptr, cd))
1131        {
1132        BOOL local_negate = FALSE;
1133        int posix_class, i;
1134        register const uschar *cbits = cd->cbits;
1135
1136        if (ptr[1] != ':')
1137          {
1138          *errorptr = ERR31;
1139          goto FAILED;
1140          }
1141
1142        ptr += 2;
1143        if (*ptr == '^')
1144          {
1145          local_negate = TRUE;
1146          ptr++;
1147          }
1148
1149        posix_class = check_posix_name(ptr, tempptr - ptr);
1150        if (posix_class < 0)
1151          {
1152          *errorptr = ERR30;
1153          goto FAILED;
1154          }
1155
1156        /* If matching is caseless, upper and lower are converted to
1157        alpha. This relies on the fact that the class table starts with
1158        alpha, lower, upper as the first 3 entries. */
1159
1160        if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)
1161          posix_class = 0;
1162
1163        /* Or into the map we are building up to 3 of the static class
1164        tables, or their negations. */
1165
1166        posix_class *= 3;
1167        for (i = 0; i < 3; i++)
1168          {
1169          int taboffset = posix_class_maps[posix_class + i];
1170          if (taboffset < 0) break;
1171          if (local_negate)
1172            for (c = 0; c < 32; c++) class[c] |= ~cbits[c+taboffset];
1173          else
1174            for (c = 0; c < 32; c++) class[c] |= cbits[c+taboffset];
1175          }
1176
1177        ptr = tempptr + 1;
1178        class_charcount = 10;  /* Set > 1; assumes more than 1 per class */
1179        continue;
1180        }
1181
1182      /* Backslash may introduce a single character, or it may introduce one
1183      of the specials, which just set a flag. Escaped items are checked for
1184      validity in the pre-compiling pass. The sequence \b is a special case.
1185      Inside a class (and only there) it is treated as backspace. Elsewhere
1186      it marks a word boundary. Other escapes have preset maps ready to
1187      or into the one we are building. We assume they have more than one
1188      character in them, so set class_count bigger than one. */
1189
1190      if (c == '\\')
1191        {
1192        c = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd);
1193        if (-c == ESC_b) c = '\b';
1194        else if (c < 0)
1195          {
1196          register const uschar *cbits = cd->cbits;
1197          class_charcount = 10;
1198          switch (-c)
1199            {
1200            case ESC_d:
1201            for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_digit];
1202            continue;
1203
1204            case ESC_D:
1205            for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_digit];
1206            continue;
1207
1208            case ESC_w:
1209            for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_word];
1210            continue;
1211
1212            case ESC_W:
1213            for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_word];
1214            continue;
1215
1216            case ESC_s:
1217            for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_space];
1218            continue;
1219
1220            case ESC_S:
1221            for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_space];
1222            continue;
1223
1224            default:
1225            *errorptr = ERR7;
1226            goto FAILED;
1227            }
1228          }
1229
1230        /* Fall through if single character, but don't at present allow
1231        chars > 255 in UTF-8 mode. */
1232
1233#ifdef SUPPORT_UTF8
1234        if (c > 255)
1235          {
1236          *errorptr = ERR33;
1237          goto FAILED;
1238          }
1239#endif
1240        }
1241
1242      /* A single character may be followed by '-' to form a range. However,
1243      Perl does not permit ']' to be the end of the range. A '-' character
1244      here is treated as a literal. */
1245
1246      if (ptr[1] == '-' && ptr[2] != ']')
1247        {
1248        int d;
1249        ptr += 2;
1250        d = *ptr;
1251
1252        if (d == 0)
1253          {
1254          *errorptr = ERR6;
1255          goto FAILED;
1256          }
1257
1258        /* The second part of a range can be a single-character escape, but
1259        not any of the other escapes. Perl 5.6 treats a hyphen as a literal
1260        in such circumstances. */
1261
1262        if (d == '\\')
1263          {
1264          const uschar *oldptr = ptr;
1265          d = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd);
1266
1267#ifdef SUPPORT_UTF8
1268          if (d > 255)
1269            {
1270            *errorptr = ERR33;
1271            goto FAILED;
1272            }
1273#endif
1274          /* \b is backslash; any other special means the '-' was literal */
1275
1276          if (d < 0)
1277            {
1278            if (d == -ESC_b) d = '\b'; else
1279              {
1280              ptr = oldptr - 2;
1281              goto SINGLE_CHARACTER;  /* A few lines below */
1282              }
1283            }
1284          }
1285
1286        if (d < c)
1287          {
1288          *errorptr = ERR8;
1289          goto FAILED;
1290          }
1291
1292        for (; c <= d; c++)
1293          {
1294          class[c/8] |= (1 << (c&7));
1295          if ((options & PCRE_CASELESS) != 0)
1296            {
1297            int uc = cd->fcc[c];           /* flip case */
1298            class[uc/8] |= (1 << (uc&7));
1299            }
1300          class_charcount++;                /* in case a one-char range */
1301          class_lastchar = c;
1302          }
1303        continue;   /* Go get the next char in the class */
1304        }
1305
1306      /* Handle a lone single character - we can get here for a normal
1307      non-escape char, or after \ that introduces a single character. */
1308
1309      SINGLE_CHARACTER:
1310
1311      class [c/8] |= (1 << (c&7));
1312      if ((options & PCRE_CASELESS) != 0)
1313        {
1314        c = cd->fcc[c];   /* flip case */
1315        class[c/8] |= (1 << (c&7));
1316        }
1317      class_charcount++;
1318      class_lastchar = c;
1319      }
1320
1321    /* Loop until ']' reached; the check for end of string happens inside the
1322    loop. This "while" is the end of the "do" above. */
1323
1324    while ((c = *(++ptr)) != ']');
1325
1326    /* If class_charcount is 1 and class_lastchar is not negative, we saw
1327    precisely one character. This doesn't need the whole 32-byte bit map.
1328    We turn it into a 1-character OP_CHAR if it's positive, or OP_NOT if
1329    it's negative. */
1330
1331    if (class_charcount == 1 && class_lastchar >= 0)
1332      {
1333      if (negate_class)
1334        {
1335        code[-1] = OP_NOT;
1336        }
1337      else
1338        {
1339        code[-1] = OP_CHARS;
1340        *code++ = 1;
1341        }
1342      *code++ = class_lastchar;
1343      }
1344
1345    /* Otherwise, negate the 32-byte map if necessary, and copy it into
1346    the code vector. */
1347
1348    else
1349      {
1350      if (negate_class)
1351        for (c = 0; c < 32; c++) code[c] = ~class[c];
1352      else
1353        memcpy(code, class, 32);
1354      code += 32;
1355      }
1356    break;
1357
1358    /* Various kinds of repeat */
1359
1360    case '{':
1361    if (!is_counted_repeat(ptr+1, cd)) goto NORMAL_CHAR;
1362    ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr, cd);
1363    if (*errorptr != NULL) goto FAILED;
1364    goto REPEAT;
1365
1366    case '*':
1367    repeat_min = 0;
1368    repeat_max = -1;
1369    goto REPEAT;
1370
1371    case '+':
1372    repeat_min = 1;
1373    repeat_max = -1;
1374    goto REPEAT;
1375
1376    case '?':
1377    repeat_min = 0;
1378    repeat_max = 1;
1379
1380    REPEAT:
1381    if (previous == NULL)
1382      {
1383      *errorptr = ERR9;
1384      goto FAILED;
1385      }
1386
1387    /* If the next character is '?' this is a minimizing repeat, by default,
1388    but if PCRE_UNGREEDY is set, it works the other way round. Advance to the
1389    next character. */
1390
1391    if (ptr[1] == '?')
1392      { repeat_type = greedy_non_default; ptr++; }
1393    else repeat_type = greedy_default;
1394
1395    /* If previous was a string of characters, chop off the last one and use it
1396    as the subject of the repeat. If there was only one character, we can
1397    abolish the previous item altogether. A repeat with a zero minimum wipes
1398    out any reqchar setting, backing up to the previous value. We must also
1399    adjust the countlits value. */
1400
1401    if (*previous == OP_CHARS)
1402      {
1403      int len = previous[1];
1404
1405      if (repeat_min == 0) *reqchar = prevreqchar;
1406      *countlits += repeat_min - 1;
1407
1408      if (len == 1)
1409        {
1410        c = previous[2];
1411        code = previous;
1412        }
1413      else
1414        {
1415        c = previous[len+1];
1416        previous[1]--;
1417        code--;
1418        }
1419      op_type = 0;                 /* Use single-char op codes */
1420      goto OUTPUT_SINGLE_REPEAT;   /* Code shared with single character types */
1421      }
1422
1423    /* If previous was a single negated character ([^a] or similar), we use
1424    one of the special opcodes, replacing it. The code is shared with single-
1425    character repeats by adding a suitable offset into repeat_type. */
1426
1427    else if ((int)*previous == OP_NOT)
1428      {
1429      op_type = OP_NOTSTAR - OP_STAR;  /* Use "not" opcodes */
1430      c = previous[1];
1431      code = previous;
1432      goto OUTPUT_SINGLE_REPEAT;
1433      }
1434
1435    /* If previous was a character type match (\d or similar), abolish it and
1436    create a suitable repeat item. The code is shared with single-character
1437    repeats by adding a suitable offset into repeat_type. */
1438
1439    else if ((int)*previous < OP_EODN || *previous == OP_ANY)
1440      {
1441      op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */
1442      c = *previous;
1443      code = previous;
1444
1445      OUTPUT_SINGLE_REPEAT:
1446
1447      /* If the maximum is zero then the minimum must also be zero; Perl allows
1448      this case, so we do too - by simply omitting the item altogether. */
1449
1450      if (repeat_max == 0) goto END_REPEAT;
1451
1452      /* Combine the op_type with the repeat_type */
1453
1454      repeat_type += op_type;
1455
1456      /* A minimum of zero is handled either as the special case * or ?, or as
1457      an UPTO, with the maximum given. */
1458
1459      if (repeat_min == 0)
1460        {
1461        if (repeat_max == -1) *code++ = OP_STAR + repeat_type;
1462          else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type;
1463        else
1464          {
1465          *code++ = OP_UPTO + repeat_type;
1466          *code++ = repeat_max >> 8;
1467          *code++ = (repeat_max & 255);
1468          }
1469        }
1470
1471      /* The case {1,} is handled as the special case + */
1472
1473      else if (repeat_min == 1 && repeat_max == -1)
1474        *code++ = OP_PLUS + repeat_type;
1475
1476      /* The case {n,n} is just an EXACT, while the general case {n,m} is
1477      handled as an EXACT followed by an UPTO. An EXACT of 1 is optimized. */
1478
1479      else
1480        {
1481        if (repeat_min != 1)
1482          {
1483          *code++ = OP_EXACT + op_type;  /* NB EXACT doesn't have repeat_type */
1484          *code++ = repeat_min >> 8;
1485          *code++ = (repeat_min & 255);
1486          }
1487
1488        /* If the mininum is 1 and the previous item was a character string,
1489        we either have to put back the item that got cancelled if the string
1490        length was 1, or add the character back onto the end of a longer
1491        string. For a character type nothing need be done; it will just get
1492        put back naturally. Note that the final character is always going to
1493        get added below. */
1494
1495        else if (*previous == OP_CHARS)
1496          {
1497          if (code == previous) code += 2; else previous[1]++;
1498          }
1499
1500        /*  For a single negated character we also have to put back the
1501        item that got cancelled. */
1502
1503        else if (*previous == OP_NOT) code++;
1504
1505        /* If the maximum is unlimited, insert an OP_STAR. */
1506
1507        if (repeat_max < 0)
1508          {
1509          *code++ = c;
1510          *code++ = OP_STAR + repeat_type;
1511          }
1512
1513        /* Else insert an UPTO if the max is greater than the min. */
1514
1515        else if (repeat_max != repeat_min)
1516          {
1517          *code++ = c;
1518          repeat_max -= repeat_min;
1519          *code++ = OP_UPTO + repeat_type;
1520          *code++ = repeat_max >> 8;
1521          *code++ = (repeat_max & 255);
1522          }
1523        }
1524
1525      /* The character or character type itself comes last in all cases. */
1526
1527      *code++ = c;
1528      }
1529
1530    /* If previous was a character class or a back reference, we put the repeat
1531    stuff after it, but just skip the item if the repeat was {0,0}. */
1532
1533    else if (*previous == OP_CLASS || *previous == OP_REF)
1534      {
1535      if (repeat_max == 0)
1536        {
1537        code = previous;
1538        goto END_REPEAT;
1539        }
1540      if (repeat_min == 0 && repeat_max == -1)
1541        *code++ = OP_CRSTAR + repeat_type;
1542      else if (repeat_min == 1 && repeat_max == -1)
1543        *code++ = OP_CRPLUS + repeat_type;
1544      else if (repeat_min == 0 && repeat_max == 1)
1545        *code++ = OP_CRQUERY + repeat_type;
1546      else
1547        {
1548        *code++ = OP_CRRANGE + repeat_type;
1549        *code++ = repeat_min >> 8;
1550        *code++ = repeat_min & 255;
1551        if (repeat_max == -1) repeat_max = 0;  /* 2-byte encoding for max */
1552        *code++ = repeat_max >> 8;
1553        *code++ = repeat_max & 255;
1554        }
1555      }
1556
1557    /* If previous was a bracket group, we may have to replicate it in certain
1558    cases. */
1559
1560    else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE ||
1561             (int)*previous == OP_COND)
1562      {
1563      register int i;
1564      int ketoffset = 0;
1565      int len = code - previous;
1566      uschar *bralink = NULL;
1567
1568      /* If the maximum repeat count is unlimited, find the end of the bracket
1569      by scanning through from the start, and compute the offset back to it
1570      from the current code pointer. There may be an OP_OPT setting following
1571      the final KET, so we can't find the end just by going back from the code
1572      pointer. */
1573
1574      if (repeat_max == -1)
1575        {
1576        register uschar *ket = previous;
1577        do ket += (ket[1] << 8) + ket[2]; while (*ket != OP_KET);
1578        ketoffset = code - ket;
1579        }
1580
1581      /* The case of a zero minimum is special because of the need to stick
1582      OP_BRAZERO in front of it, and because the group appears once in the
1583      data, whereas in other cases it appears the minimum number of times. For
1584      this reason, it is simplest to treat this case separately, as otherwise
1585      the code gets far too messy. There are several special subcases when the
1586      minimum is zero. */
1587
1588      if (repeat_min == 0)
1589        {
1590        /* If we set up a required char from the bracket, we must back off
1591        to the previous value and reset the countlits value too. */
1592
1593        if (subcountlits > 0)
1594          {
1595          *reqchar = prevreqchar;
1596          *countlits -= subcountlits;
1597          }
1598
1599        /* If the maximum is also zero, we just omit the group from the output
1600        altogether. */
1601
1602        if (repeat_max == 0)
1603          {
1604          code = previous;
1605          goto END_REPEAT;
1606          }
1607
1608        /* If the maximum is 1 or unlimited, we just have to stick in the
1609        BRAZERO and do no more at this point. */
1610
1611        if (repeat_max <= 1)
1612          {
1613          memmove(previous+1, previous, len);
1614          code++;
1615          *previous++ = OP_BRAZERO + repeat_type;
1616          }
1617
1618        /* If the maximum is greater than 1 and limited, we have to replicate
1619        in a nested fashion, sticking OP_BRAZERO before each set of brackets.
1620        The first one has to be handled carefully because it's the original
1621        copy, which has to be moved up. The remainder can be handled by code
1622        that is common with the non-zero minimum case below. We just have to
1623        adjust the value or repeat_max, since one less copy is required. */
1624
1625        else
1626          {
1627          int offset;
1628          memmove(previous+4, previous, len);
1629          code += 4;
1630          *previous++ = OP_BRAZERO + repeat_type;
1631          *previous++ = OP_BRA;
1632
1633          /* We chain together the bracket offset fields that have to be
1634          filled in later when the ends of the brackets are reached. */
1635
1636          offset = (bralink == NULL)? 0 : previous - bralink;
1637          bralink = previous;
1638          *previous++ = offset >> 8;
1639          *previous++ = offset & 255;
1640          }
1641
1642        repeat_max--;
1643        }
1644
1645      /* If the minimum is greater than zero, replicate the group as many
1646      times as necessary, and adjust the maximum to the number of subsequent
1647      copies that we need. */
1648
1649      else
1650        {
1651        for (i = 1; i < repeat_min; i++)
1652          {
1653          memcpy(code, previous, len);
1654          code += len;
1655          }
1656        if (repeat_max > 0) repeat_max -= repeat_min;
1657        }
1658
1659      /* This code is common to both the zero and non-zero minimum cases. If
1660      the maximum is limited, it replicates the group in a nested fashion,
1661      remembering the bracket starts on a stack. In the case of a zero minimum,
1662      the first one was set up above. In all cases the repeat_max now specifies
1663      the number of additional copies needed. */
1664
1665      if (repeat_max >= 0)
1666        {
1667        for (i = repeat_max - 1; i >= 0; i--)
1668          {
1669          *code++ = OP_BRAZERO + repeat_type;
1670
1671          /* All but the final copy start a new nesting, maintaining the
1672          chain of brackets outstanding. */
1673
1674          if (i != 0)
1675            {
1676            int offset;
1677            *code++ = OP_BRA;
1678            offset = (bralink == NULL)? 0 : code - bralink;
1679            bralink = code;
1680            *code++ = offset >> 8;
1681            *code++ = offset & 255;
1682            }
1683
1684          memcpy(code, previous, len);
1685          code += len;
1686          }
1687
1688        /* Now chain through the pending brackets, and fill in their length
1689        fields (which are holding the chain links pro tem). */
1690
1691        while (bralink != NULL)
1692          {
1693          int oldlinkoffset;
1694          int offset = code - bralink + 1;
1695          uschar *bra = code - offset;
1696          oldlinkoffset = (bra[1] << 8) + bra[2];
1697          bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
1698          *code++ = OP_KET;
1699          *code++ = bra[1] = offset >> 8;
1700          *code++ = bra[2] = (offset & 255);
1701          }
1702        }
1703
1704      /* If the maximum is unlimited, set a repeater in the final copy. We
1705      can't just offset backwards from the current code point, because we
1706      don't know if there's been an options resetting after the ket. The
1707      correct offset was computed above. */
1708
1709      else code[-ketoffset] = OP_KETRMAX + repeat_type;
1710      }
1711
1712    /* Else there's some kind of shambles */
1713
1714    else
1715      {
1716      *errorptr = ERR11;
1717      goto FAILED;
1718      }
1719
1720    /* In all case we no longer have a previous item. */
1721
1722    END_REPEAT:
1723    previous = NULL;
1724    break;
1725
1726
1727    /* Start of nested bracket sub-expression, or comment or lookahead or
1728    lookbehind or option setting or condition. First deal with special things
1729    that can come after a bracket; all are introduced by ?, and the appearance
1730    of any of them means that this is not a referencing group. They were
1731    checked for validity in the first pass over the string, so we don't have to
1732    check for syntax errors here.  */
1733
1734    case '(':
1735    newoptions = options;
1736    skipbytes = 0;
1737
1738    if (*(++ptr) == '?')
1739      {
1740      int set, unset;
1741      int *optset;
1742
1743      switch (*(++ptr))
1744        {
1745        case '#':                 /* Comment; skip to ket */
1746        ptr++;
1747        while (*ptr != ')') ptr++;
1748        continue;
1749
1750        case ':':                 /* Non-extracting bracket */
1751        bravalue = OP_BRA;
1752        ptr++;
1753        break;
1754
1755        case '(':
1756        bravalue = OP_COND;       /* Conditional group */
1757        if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0)
1758          {
1759          int condref = *ptr - '0';
1760          while (*(++ptr) != ')') condref = condref*10 + *ptr - '0';
1761          if (condref == 0)
1762            {
1763            *errorptr = ERR35;
1764            goto FAILED;
1765            }
1766          ptr++;
1767          code[3] = OP_CREF;
1768          code[4] = condref >> 8;
1769          code[5] = condref & 255;
1770          skipbytes = 3;
1771          }
1772        else ptr--;
1773        break;
1774
1775        case '=':                 /* Positive lookahead */
1776        bravalue = OP_ASSERT;
1777        ptr++;
1778        break;
1779
1780        case '!':                 /* Negative lookahead */
1781        bravalue = OP_ASSERT_NOT;
1782        ptr++;
1783        break;
1784
1785        case '<':                 /* Lookbehinds */
1786        switch (*(++ptr))
1787          {
1788          case '=':               /* Positive lookbehind */
1789          bravalue = OP_ASSERTBACK;
1790          ptr++;
1791          break;
1792
1793          case '!':               /* Negative lookbehind */
1794          bravalue = OP_ASSERTBACK_NOT;
1795          ptr++;
1796          break;
1797
1798          default:                /* Syntax error */
1799          *errorptr = ERR24;
1800          goto FAILED;
1801          }
1802        break;
1803
1804        case '>':                 /* One-time brackets */
1805        bravalue = OP_ONCE;
1806        ptr++;
1807        break;
1808
1809        case 'R':                 /* Pattern recursion */
1810        *code++ = OP_RECURSE;
1811        ptr++;
1812        continue;
1813
1814        default:                  /* Option setting */
1815        set = unset = 0;
1816        optset = &set;
1817
1818        while (*ptr != ')' && *ptr != ':')
1819          {
1820          switch (*ptr++)
1821            {
1822            case '-': optset = &unset; break;
1823
1824            case 'i': *optset |= PCRE_CASELESS; break;
1825            case 'm': *optset |= PCRE_MULTILINE; break;
1826            case 's': *optset |= PCRE_DOTALL; break;
1827            case 'x': *optset |= PCRE_EXTENDED; break;
1828            case 'U': *optset |= PCRE_UNGREEDY; break;
1829            case 'X': *optset |= PCRE_EXTRA; break;
1830
1831            default:
1832            *errorptr = ERR12;
1833            goto FAILED;
1834            }
1835          }
1836
1837        /* Set up the changed option bits, but don't change anything yet. */
1838
1839        newoptions = (options | set) & (~unset);
1840
1841        /* If the options ended with ')' this is not the start of a nested
1842        group with option changes, so the options change at this level. At top
1843        level there is nothing else to be done (the options will in fact have
1844        been set from the start of compiling as a result of the first pass) but
1845        at an inner level we must compile code to change the ims options if
1846        necessary, and pass the new setting back so that it can be put at the
1847        start of any following branches, and when this group ends, a resetting
1848        item can be compiled. */
1849
1850        if (*ptr == ')')
1851          {
1852          if ((options & PCRE_INGROUP) != 0 &&
1853              (options & PCRE_IMS) != (newoptions & PCRE_IMS))
1854            {
1855            *code++ = OP_OPT;
1856            *code++ = *optchanged = newoptions & PCRE_IMS;
1857            }
1858          options = newoptions;  /* Change options at this level */
1859          previous = NULL;       /* This item can't be repeated */
1860          continue;              /* It is complete */
1861          }
1862
1863        /* If the options ended with ':' we are heading into a nested group
1864        with possible change of options. Such groups are non-capturing and are
1865        not assertions of any kind. All we need to do is skip over the ':';
1866        the newoptions value is handled below. */
1867
1868        bravalue = OP_BRA;
1869        ptr++;
1870        }
1871      }
1872
1873    /* Else we have a referencing group; adjust the opcode. If the bracket
1874    number is greater than EXTRACT_BASIC_MAX, we set the opcode one higher, and
1875    arrange for the true number to follow later, in an OP_BRANUMBER item. */
1876
1877    else
1878      {
1879      if (++(*brackets) > EXTRACT_BASIC_MAX)
1880        {
1881        bravalue = OP_BRA + EXTRACT_BASIC_MAX + 1;
1882        code[3] = OP_BRANUMBER;
1883        code[4] = *brackets >> 8;
1884        code[5] = *brackets & 255;
1885        skipbytes = 3;
1886        }
1887      else bravalue = OP_BRA + *brackets;
1888      }
1889
1890    /* Process nested bracketed re. Assertions may not be repeated, but other
1891    kinds can be. We copy code into a non-register variable in order to be able
1892    to pass its address because some compilers complain otherwise. Pass in a
1893    new setting for the ims options if they have changed. */
1894
1895    previous = (bravalue >= OP_ONCE)? code : NULL;
1896    *code = bravalue;
1897    tempcode = code;
1898
1899    if (!compile_regex(
1900         options | PCRE_INGROUP,       /* Set for all nested groups */
1901         ((options & PCRE_IMS) != (newoptions & PCRE_IMS))?
1902           newoptions & PCRE_IMS : -1, /* Pass ims options if changed */
1903         brackets,                     /* Extracting bracket count */
1904         &tempcode,                    /* Where to put code (updated) */
1905         &ptr,                         /* Input pointer (updated) */
1906         errorptr,                     /* Where to put an error message */
1907         (bravalue == OP_ASSERTBACK ||
1908          bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
1909         skipbytes,                    /* Skip over OP_COND/OP_BRANUMBER */
1910         &subreqchar,                  /* For possible last char */
1911         &subcountlits,                /* For literal count */
1912         cd))                          /* Tables block */
1913      goto FAILED;
1914
1915    /* At the end of compiling, code is still pointing to the start of the
1916    group, while tempcode has been updated to point past the end of the group
1917    and any option resetting that may follow it. The pattern pointer (ptr)
1918    is on the bracket. */
1919
1920    /* If this is a conditional bracket, check that there are no more than
1921    two branches in the group. */
1922
1923    else if (bravalue == OP_COND)
1924      {
1925      uschar *tc = code;
1926      condcount = 0;
1927
1928      do {
1929         condcount++;
1930         tc += (tc[1] << 8) | tc[2];
1931         }
1932      while (*tc != OP_KET);
1933
1934      if (condcount > 2)
1935        {
1936        *errorptr = ERR27;
1937        goto FAILED;
1938        }
1939      }
1940
1941    /* Handle updating of the required character. If the subpattern didn't
1942    set one, leave it as it was. Otherwise, update it for normal brackets of
1943    all kinds, forward assertions, and conditions with two branches. Don't
1944    update the literal count for forward assertions, however. If the bracket
1945    is followed by a quantifier with zero repeat, we have to back off. Hence
1946    the definition of prevreqchar and subcountlits outside the main loop so
1947    that they can be accessed for the back off. */
1948
1949    if (subreqchar > 0 &&
1950         (bravalue >= OP_BRA || bravalue == OP_ONCE || bravalue == OP_ASSERT ||
1951         (bravalue == OP_COND && condcount == 2)))
1952      {
1953      prevreqchar = *reqchar;
1954      *reqchar = subreqchar;
1955      if (bravalue != OP_ASSERT) *countlits += subcountlits;
1956      }
1957
1958    /* Now update the main code pointer to the end of the group. */
1959
1960    code = tempcode;
1961
1962    /* Error if hit end of pattern */
1963
1964    if (*ptr != ')')
1965      {
1966      *errorptr = ERR14;
1967      goto FAILED;
1968      }
1969    break;
1970
1971    /* Check \ for being a real metacharacter; if not, fall through and handle
1972    it as a data character at the start of a string. Escape items are checked
1973    for validity in the pre-compiling pass. */
1974
1975    case '\\':
1976    tempptr = ptr;
1977    c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);
1978
1979    /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values
1980    are arranged to be the negation of the corresponding OP_values. For the
1981    back references, the values are ESC_REF plus the reference number. Only
1982    back references and those types that consume a character may be repeated.
1983    We can test for values between ESC_b and ESC_Z for the latter; this may
1984    have to change if any new ones are ever created. */
1985
1986    if (c < 0)
1987      {
1988      if (-c >= ESC_REF)
1989        {
1990        int number = -c - ESC_REF;
1991        previous = code;
1992        *code++ = OP_REF;
1993        *code++ = number >> 8;
1994        *code++ = number & 255;
1995        }
1996      else
1997        {
1998        previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
1999        *code++ = -c;
2000        }
2001      continue;
2002      }
2003
2004    /* Data character: reset and fall through */
2005
2006    ptr = tempptr;
2007    c = '\\';
2008
2009    /* Handle a run of data characters until a metacharacter is encountered.
2010    The first character is guaranteed not to be whitespace or # when the
2011    extended flag is set. */
2012
2013    NORMAL_CHAR:
2014    default:
2015    previous = code;
2016    *code = OP_CHARS;
2017    code += 2;
2018    length = 0;
2019
2020    do
2021      {
2022      if ((options & PCRE_EXTENDED) != 0)
2023        {
2024        if ((cd->ctypes[c] & ctype_space) != 0) continue;
2025        if (c == '#')
2026          {
2027          /* The space before the ; is to avoid a warning on a silly compiler
2028          on the Macintosh. */
2029          while ((c = *(++ptr)) != 0 && c != NEWLINE) ;
2030          if (c == 0) break;
2031          continue;
2032          }
2033        }
2034
2035      /* Backslash may introduce a data char or a metacharacter. Escaped items
2036      are checked for validity in the pre-compiling pass. Stop the string
2037      before a metaitem. */
2038
2039      if (c == '\\')
2040        {
2041        tempptr = ptr;
2042        c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);
2043        if (c < 0) { ptr = tempptr; break; }
2044
2045        /* If a character is > 127 in UTF-8 mode, we have to turn it into
2046        two or more characters in the UTF-8 encoding. */
2047
2048#ifdef SUPPORT_UTF8
2049        if (c > 127 && (options & PCRE_UTF8) != 0)
2050          {
2051          uschar buffer[8];
2052          int len = ord2utf8(c, buffer);
2053          for (c = 0; c < len; c++) *code++ = buffer[c];
2054          length += len;
2055          continue;
2056          }
2057#endif
2058        }
2059
2060      /* Ordinary character or single-char escape */
2061
2062      *code++ = c;
2063      length++;
2064      }
2065
2066    /* This "while" is the end of the "do" above. */
2067
2068    while (length < MAXLIT && (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0);
2069
2070    /* Update the last character and the count of literals */
2071
2072    prevreqchar = (length > 1)? code[-2] : *reqchar;
2073    *reqchar = code[-1];
2074    *countlits += length;
2075
2076    /* Compute the length and set it in the data vector, and advance to
2077    the next state. */
2078
2079    previous[1] = length;
2080    if (length < MAXLIT) ptr--;
2081    break;
2082    }
2083  }                   /* end of big loop */
2084
2085/* Control never reaches here by falling through, only by a goto for all the
2086error states. Pass back the position in the pattern so that it can be displayed
2087to the user for diagnosing the error. */
2088
2089FAILED:
2090*ptrptr = ptr;
2091return FALSE;
2092}
2093
2094
2095
2096
2097/*************************************************
2098*     Compile sequence of alternatives           *
2099*************************************************/
2100
2101/* On entry, ptr is pointing past the bracket character, but on return
2102it points to the closing bracket, or vertical bar, or end of string.
2103The code variable is pointing at the byte into which the BRA operator has been
2104stored. If the ims options are changed at the start (for a (?ims: group) or
2105during any branch, we need to insert an OP_OPT item at the start of every
2106following branch to ensure they get set correctly at run time, and also pass
2107the new options into every subsequent branch compile.
2108
2109Argument:
2110  options     the option bits
2111  optchanged  new ims options to set as if (?ims) were at the start, or -1
2112               for no change
2113  brackets    -> int containing the number of extracting brackets used
2114  codeptr     -> the address of the current code pointer
2115  ptrptr      -> the address of the current pattern pointer
2116  errorptr    -> pointer to error message
2117  lookbehind  TRUE if this is a lookbehind assertion
2118  skipbytes   skip this many bytes at start (for OP_COND, OP_BRANUMBER)
2119  reqchar     -> place to put the last required character, or a negative number
2120  countlits   -> place to put the shortest literal count of any branch
2121  cd          points to the data block with tables pointers
2122
2123Returns:      TRUE on success
2124*/
2125
2126static BOOL
2127compile_regex(int options, int optchanged, int *brackets, uschar **codeptr,
2128  const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int skipbytes,
2129  int *reqchar, int *countlits, compile_data *cd)
2130{
2131const uschar *ptr = *ptrptr;
2132uschar *code = *codeptr;
2133uschar *last_branch = code;
2134uschar *start_bracket = code;
2135uschar *reverse_count = NULL;
2136int oldoptions = options & PCRE_IMS;
2137int branchreqchar, branchcountlits;
2138
2139*reqchar = -1;
2140*countlits = INT_MAX;
2141code += 3 + skipbytes;
2142
2143/* Loop for each alternative branch */
2144
2145for (;;)
2146  {
2147  int length;
2148
2149  /* Handle change of options */
2150
2151  if (optchanged >= 0)
2152    {
2153    *code++ = OP_OPT;
2154    *code++ = optchanged;
2155    options = (options & ~PCRE_IMS) | optchanged;
2156    }
2157
2158  /* Set up dummy OP_REVERSE if lookbehind assertion */
2159
2160  if (lookbehind)
2161    {
2162    *code++ = OP_REVERSE;
2163    reverse_count = code;
2164    *code++ = 0;
2165    *code++ = 0;
2166    }
2167
2168  /* Now compile the branch */
2169
2170  if (!compile_branch(options, brackets, &code, &ptr, errorptr, &optchanged,
2171      &branchreqchar, &branchcountlits, cd))
2172    {
2173    *ptrptr = ptr;
2174    return FALSE;
2175    }
2176
2177  /* Fill in the length of the last branch */
2178
2179  length = code - last_branch;
2180  last_branch[1] = length >> 8;
2181  last_branch[2] = length & 255;
2182
2183  /* Save the last required character if all branches have the same; a current
2184  value of -1 means unset, while -2 means "previous branch had no last required
2185  char".  */
2186
2187  if (*reqchar != -2)
2188    {
2189    if (branchreqchar >= 0)
2190      {
2191      if (*reqchar == -1) *reqchar = branchreqchar;
2192      else if (*reqchar != branchreqchar) *reqchar = -2;
2193      }
2194    else *reqchar = -2;
2195    }
2196
2197  /* Keep the shortest literal count */
2198
2199  if (branchcountlits < *countlits) *countlits = branchcountlits;
2200  DPRINTF(("literal count = %d min=%d\n", branchcountlits, *countlits));
2201
2202  /* If lookbehind, check that this branch matches a fixed-length string,
2203  and put the length into the OP_REVERSE item. Temporarily mark the end of
2204  the branch with OP_END. */
2205
2206  if (lookbehind)
2207    {
2208    *code = OP_END;
2209    length = find_fixedlength(last_branch, options);
2210    DPRINTF(("fixed length = %d\n", length));
2211    if (length < 0)
2212      {
2213      *errorptr = ERR25;
2214      *ptrptr = ptr;
2215      return FALSE;
2216      }
2217    reverse_count[0] = (length >> 8);
2218    reverse_count[1] = length & 255;
2219    }
2220
2221  /* Reached end of expression, either ')' or end of pattern. Insert a
2222  terminating ket and the length of the whole bracketed item, and return,
2223  leaving the pointer at the terminating char. If any of the ims options
2224  were changed inside the group, compile a resetting op-code following. */
2225
2226  if (*ptr != '|')
2227    {
2228    length = code - start_bracket;
2229    *code++ = OP_KET;
2230    *code++ = length >> 8;
2231    *code++ = length & 255;
2232    if (optchanged >= 0)
2233      {
2234      *code++ = OP_OPT;
2235      *code++ = oldoptions;
2236      }
2237    *codeptr = code;
2238    *ptrptr = ptr;
2239    return TRUE;
2240    }
2241
2242  /* Another branch follows; insert an "or" node and advance the pointer. */
2243
2244  *code = OP_ALT;
2245  last_branch = code;
2246  code += 3;
2247  ptr++;
2248  }
2249/* Control never reaches here */
2250}
2251
2252
2253
2254
2255/*************************************************
2256*      Find first significant op code            *
2257*************************************************/
2258
2259/* This is called by several functions that scan a compiled expression looking
2260for a fixed first character, or an anchoring op code etc. It skips over things
2261that do not influence this. For one application, a change of caseless option is
2262important.
2263
2264Arguments:
2265  code       pointer to the start of the group
2266  options    pointer to external options
2267  optbit     the option bit whose changing is significant, or
2268             zero if none are
2269  optstop    TRUE to return on option change, otherwise change the options
2270               value and continue
2271
2272Returns:     pointer to the first significant opcode
2273*/
2274
2275static const uschar*
2276first_significant_code(const uschar *code, int *options, int optbit,
2277  BOOL optstop)
2278{
2279for (;;)
2280  {
2281  switch ((int)*code)
2282    {
2283    case OP_OPT:
2284    if (optbit > 0 && ((int)code[1] & optbit) != (*options & optbit))
2285      {
2286      if (optstop) return code;
2287      *options = (int)code[1];
2288      }
2289    code += 2;
2290    break;
2291
2292    case OP_CREF:
2293    case OP_BRANUMBER:
2294    code += 3;
2295    break;
2296
2297    case OP_WORD_BOUNDARY:
2298    case OP_NOT_WORD_BOUNDARY:
2299    code++;
2300    break;
2301
2302    case OP_ASSERT_NOT:
2303    case OP_ASSERTBACK:
2304    case OP_ASSERTBACK_NOT:
2305    do code += (code[1] << 8) + code[2]; while (*code == OP_ALT);
2306    code += 3;
2307    break;
2308
2309    default:
2310    return code;
2311    }
2312  }
2313/* Control never reaches here */
2314}
2315
2316
2317
2318
2319/*************************************************
2320*          Check for anchored expression         *
2321*************************************************/
2322
2323/* Try to find out if this is an anchored regular expression. Consider each
2324alternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket
2325all of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then
2326it's anchored. However, if this is a multiline pattern, then only OP_SOD
2327counts, since OP_CIRC can match in the middle.
2328
2329A branch is also implicitly anchored if it starts with .* and DOTALL is set,
2330because that will try the rest of the pattern at all possible matching points,
2331so there is no point trying them again.
2332
2333Arguments:
2334  code       points to start of expression (the bracket)
2335  options    points to the options setting
2336
2337Returns:     TRUE or FALSE
2338*/
2339
2340static BOOL
2341is_anchored(register const uschar *code, int *options)
2342{
2343do {
2344   const uschar *scode = first_significant_code(code + 3, options,
2345     PCRE_MULTILINE, FALSE);
2346   register int op = *scode;
2347   if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
2348     { if (!is_anchored(scode, options)) return FALSE; }
2349   else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR) &&
2350            (*options & PCRE_DOTALL) != 0)
2351     { if (scode[1] != OP_ANY) return FALSE; }
2352   else if (op != OP_SOD &&
2353           ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC))
2354     return FALSE;
2355   code += (code[1] << 8) + code[2];
2356   }
2357while (*code == OP_ALT);
2358return TRUE;
2359}
2360
2361
2362
2363/*************************************************
2364*         Check for starting with ^ or .*        *
2365*************************************************/
2366
2367/* This is called to find out if every branch starts with ^ or .* so that
2368"first char" processing can be done to speed things up in multiline
2369matching and for non-DOTALL patterns that start with .* (which must start at
2370the beginning or after \n).
2371
2372Argument:  points to start of expression (the bracket)
2373Returns:   TRUE or FALSE
2374*/
2375
2376static BOOL
2377is_startline(const uschar *code)
2378{
2379do {
2380   const uschar *scode = first_significant_code(code + 3, NULL, 0, FALSE);
2381   register int op = *scode;
2382   if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
2383     { if (!is_startline(scode)) return FALSE; }
2384   else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)
2385     { if (scode[1] != OP_ANY) return FALSE; }
2386   else if (op != OP_CIRC) return FALSE;
2387   code += (code[1] << 8) + code[2];
2388   }
2389while (*code == OP_ALT);
2390return TRUE;
2391}
2392
2393
2394
2395/*************************************************
2396*          Check for fixed first char            *
2397*************************************************/
2398
2399/* Try to find out if there is a fixed first character. This is called for
2400unanchored expressions, as it speeds up their processing quite considerably.
2401Consider each alternative branch. If they all start with the same char, or with
2402a bracket all of whose alternatives start with the same char (recurse ad lib),
2403then we return that char, otherwise -1.
2404
2405Arguments:
2406  code       points to start of expression (the bracket)
2407  options    pointer to the options (used to check casing changes)
2408
2409Returns:     -1 or the fixed first char
2410*/
2411
2412static int
2413find_firstchar(const uschar *code, int *options)
2414{
2415register int c = -1;
2416do {
2417   int d;
2418   const uschar *scode = first_significant_code(code + 3, options,
2419     PCRE_CASELESS, TRUE);
2420   register int op = *scode;
2421
2422   if (op >= OP_BRA) op = OP_BRA;
2423
2424   switch(op)
2425     {
2426     default:
2427     return -1;
2428
2429     case OP_BRA:
2430     case OP_ASSERT:
2431     case OP_ONCE:
2432     case OP_COND:
2433     if ((d = find_firstchar(scode, options)) < 0) return -1;
2434     if (c < 0) c = d; else if (c != d) return -1;
2435     break;
2436
2437     case OP_EXACT:       /* Fall through */
2438     scode++;
2439
2440     case OP_CHARS:       /* Fall through */
2441     scode++;
2442
2443     case OP_PLUS:
2444     case OP_MINPLUS:
2445     if (c < 0) c = scode[1]; else if (c != scode[1]) return -1;
2446     break;
2447     }
2448
2449   code += (code[1] << 8) + code[2];
2450   }
2451while (*code == OP_ALT);
2452return c;
2453}
2454
2455
2456
2457
2458
2459/*************************************************
2460*        Compile a Regular Expression            *
2461*************************************************/
2462
2463/* This function takes a string and returns a pointer to a block of store
2464holding a compiled version of the expression.
2465
2466Arguments:
2467  pattern      the regular expression
2468  options      various option bits
2469  errorptr     pointer to pointer to error text
2470  erroroffset  ptr offset in pattern where error was detected
2471  tables       pointer to character tables or NULL
2472
2473Returns:       pointer to compiled data block, or NULL on error,
2474               with errorptr and erroroffset set
2475*/
2476
2477pcre *
2478pcre_compile(const char *pattern, int options, const char **errorptr,
2479  int *erroroffset, const unsigned char *tables)
2480{
2481real_pcre *re;
2482int length = 3;      /* For initial BRA plus length */
2483int runlength;
2484int c, reqchar, countlits;
2485int bracount = 0;
2486int top_backref = 0;
2487int branch_extra = 0;
2488int branch_newextra;
2489unsigned int brastackptr = 0;
2490size_t size;
2491uschar *code;
2492const uschar *ptr;
2493compile_data compile_block;
2494int brastack[BRASTACK_SIZE];
2495uschar bralenstack[BRASTACK_SIZE];
2496
2497#ifdef DEBUG
2498uschar *code_base, *code_end;
2499#endif
2500
2501/* Can't support UTF8 unless PCRE has been compiled to include the code. */
2502
2503#ifndef SUPPORT_UTF8
2504if ((options & PCRE_UTF8) != 0)
2505  {
2506  *errorptr = ERR32;
2507  return NULL;
2508  }
2509#endif
2510
2511/* We can't pass back an error message if errorptr is NULL; I guess the best we
2512can do is just return NULL. */
2513
2514if (errorptr == NULL) return NULL;
2515*errorptr = NULL;
2516
2517/* However, we can give a message for this error */
2518
2519if (erroroffset == NULL)
2520  {
2521  *errorptr = ERR16;
2522  return NULL;
2523  }
2524*erroroffset = 0;
2525
2526if ((options & ~PUBLIC_OPTIONS) != 0)
2527  {
2528  *errorptr = ERR17;
2529  return NULL;
2530  }
2531
2532/* Set up pointers to the individual character tables */
2533
2534if (tables == NULL) tables = pcre_default_tables;
2535compile_block.lcc = tables + lcc_offset;
2536compile_block.fcc = tables + fcc_offset;
2537compile_block.cbits = tables + cbits_offset;
2538compile_block.ctypes = tables + ctypes_offset;
2539
2540/* Reflect pattern for debugging output */
2541
2542DPRINTF(("------------------------------------------------------------------\n"));
2543DPRINTF(("%s\n", pattern));
2544
2545/* The first thing to do is to make a pass over the pattern to compute the
2546amount of store required to hold the compiled code. This does not have to be
2547perfect as long as errors are overestimates. At the same time we can detect any
2548internal flag settings. Make an attempt to correct for any counted white space
2549if an "extended" flag setting appears late in the pattern. We can't be so
2550clever for #-comments. */
2551
2552ptr = (const uschar *)(pattern - 1);
2553while ((c = *(++ptr)) != 0)
2554  {
2555  int min, max;
2556  int class_charcount;
2557  int bracket_length;
2558
2559  if ((options & PCRE_EXTENDED) != 0)
2560    {
2561    if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
2562    if (c == '#')
2563      {
2564      /* The space before the ; is to avoid a warning on a silly compiler
2565      on the Macintosh. */
2566      while ((c = *(++ptr)) != 0 && c != NEWLINE) ;
2567      continue;
2568      }
2569    }
2570
2571  switch(c)
2572    {
2573    /* A backslashed item may be an escaped "normal" character or a
2574    character type. For a "normal" character, put the pointers and
2575    character back so that tests for whitespace etc. in the input
2576    are done correctly. */
2577
2578    case '\\':
2579      {
2580      const uschar *save_ptr = ptr;
2581      c = check_escape(&ptr, errorptr, bracount, options, FALSE, &compile_block);
2582      if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2583      if (c >= 0)
2584        {
2585        ptr = save_ptr;
2586        c = '\\';
2587        goto NORMAL_CHAR;
2588        }
2589      }
2590    length++;
2591
2592    /* A back reference needs an additional 2 bytes, plus either one or 5
2593    bytes for a repeat. We also need to keep the value of the highest
2594    back reference. */
2595
2596    if (c <= -ESC_REF)
2597      {
2598      int refnum = -c - ESC_REF;
2599      if (refnum > top_backref) top_backref = refnum;
2600      length += 2;   /* For single back reference */
2601      if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
2602        {
2603        ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
2604        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2605        if ((min == 0 && (max == 1 || max == -1)) ||
2606          (min == 1 && max == -1))
2607            length++;
2608        else length += 5;
2609        if (ptr[1] == '?') ptr++;
2610        }
2611      }
2612    continue;
2613
2614    case '^':
2615    case '.':
2616    case '$':
2617    case '*':     /* These repeats won't be after brackets; */
2618    case '+':     /* those are handled separately */
2619    case '?':
2620    length++;
2621    continue;
2622
2623    /* This covers the cases of repeats after a single char, metachar, class,
2624    or back reference. */
2625
2626    case '{':
2627    if (!is_counted_repeat(ptr+1, &compile_block)) goto NORMAL_CHAR;
2628    ptr = read_repeat_counts(ptr+1, &min, &max, errorptr, &compile_block);
2629    if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2630    if ((min == 0 && (max == 1 || max == -1)) ||
2631      (min == 1 && max == -1))
2632        length++;
2633    else
2634      {
2635      length--;   /* Uncount the original char or metachar */
2636      if (min == 1) length++; else if (min > 0) length += 4;
2637      if (max > 0) length += 4; else length += 2;
2638      }
2639    if (ptr[1] == '?') ptr++;
2640    continue;
2641
2642    /* An alternation contains an offset to the next branch or ket. If any ims
2643    options changed in the previous branch(es), and/or if we are in a
2644    lookbehind assertion, extra space will be needed at the start of the
2645    branch. This is handled by branch_extra. */
2646
2647    case '|':
2648    length += 3 + branch_extra;
2649    continue;
2650
2651    /* A character class uses 33 characters. Don't worry about character types
2652    that aren't allowed in classes - they'll get picked up during the compile.
2653    A character class that contains only one character uses 2 or 3 bytes,
2654    depending on whether it is negated or not. Notice this where we can. */
2655
2656    case '[':
2657    class_charcount = 0;
2658    if (*(++ptr) == '^') ptr++;
2659    do
2660      {
2661      if (*ptr == '\\')
2662        {
2663        int ch = check_escape(&ptr, errorptr, bracount, options, TRUE,
2664          &compile_block);
2665        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2666        if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
2667        }
2668      else class_charcount++;
2669      ptr++;
2670      }
2671    while (*ptr != 0 && *ptr != ']');
2672
2673    /* Repeats for negated single chars are handled by the general code */
2674
2675    if (class_charcount == 1) length += 3; else
2676      {
2677      length += 33;
2678
2679      /* A repeat needs either 1 or 5 bytes. */
2680
2681      if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
2682        {
2683        ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
2684        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2685        if ((min == 0 && (max == 1 || max == -1)) ||
2686          (min == 1 && max == -1))
2687            length++;
2688        else length += 5;
2689        if (ptr[1] == '?') ptr++;
2690        }
2691      }
2692    continue;
2693
2694    /* Brackets may be genuine groups or special things */
2695
2696    case '(':
2697    branch_newextra = 0;
2698    bracket_length = 3;
2699
2700    /* Handle special forms of bracket, which all start (? */
2701
2702    if (ptr[1] == '?')
2703      {
2704      int set, unset;
2705      int *optset;
2706
2707      switch (c = ptr[2])
2708        {
2709        /* Skip over comments entirely */
2710        case '#':
2711        ptr += 3;
2712        while (*ptr != 0 && *ptr != ')') ptr++;
2713        if (*ptr == 0)
2714          {
2715          *errorptr = ERR18;
2716          goto PCRE_ERROR_RETURN;
2717          }
2718        continue;
2719
2720        /* Non-referencing groups and lookaheads just move the pointer on, and
2721        then behave like a non-special bracket, except that they don't increment
2722        the count of extracting brackets. Ditto for the "once only" bracket,
2723        which is in Perl from version 5.005. */
2724
2725        case ':':
2726        case '=':
2727        case '!':
2728        case '>':
2729        ptr += 2;
2730        break;
2731
2732        /* A recursive call to the regex is an extension, to provide the
2733        facility which can be obtained by $(?p{perl-code}) in Perl 5.6. */
2734
2735        case 'R':
2736        if (ptr[3] != ')')
2737          {
2738          *errorptr = ERR29;
2739          goto PCRE_ERROR_RETURN;
2740          }
2741        ptr += 3;
2742        length += 1;
2743        break;
2744
2745        /* Lookbehinds are in Perl from version 5.005 */
2746
2747        case '<':
2748        if (ptr[3] == '=' || ptr[3] == '!')
2749          {
2750          ptr += 3;
2751          branch_newextra = 3;
2752          length += 3;         /* For the first branch */
2753          break;
2754          }
2755        *errorptr = ERR24;
2756        goto PCRE_ERROR_RETURN;
2757
2758        /* Conditionals are in Perl from version 5.005. The bracket must either
2759        be followed by a number (for bracket reference) or by an assertion
2760        group. */
2761
2762        case '(':
2763        if ((compile_block.ctypes[ptr[3]] & ctype_digit) != 0)
2764          {
2765          ptr += 4;
2766          length += 3;
2767          while ((compile_block.ctypes[*ptr] & ctype_digit) != 0) ptr++;
2768          if (*ptr != ')')
2769            {
2770            *errorptr = ERR26;
2771            goto PCRE_ERROR_RETURN;
2772            }
2773          }
2774        else   /* An assertion must follow */
2775          {
2776          ptr++;   /* Can treat like ':' as far as spacing is concerned */
2777          if (ptr[2] != '?' ||
2778             (ptr[3] != '=' && ptr[3] != '!' && ptr[3] != '<') )
2779            {
2780            ptr += 2;    /* To get right offset in message */
2781            *errorptr = ERR28;
2782            goto PCRE_ERROR_RETURN;
2783            }
2784          }
2785        break;
2786
2787        /* Else loop checking valid options until ) is met. Anything else is an
2788        error. If we are without any brackets, i.e. at top level, the settings
2789        act as if specified in the options, so massage the options immediately.
2790        This is for backward compatibility with Perl 5.004. */
2791
2792        default:
2793        set = unset = 0;
2794        optset = &set;
2795        ptr += 2;
2796
2797        for (;; ptr++)
2798          {
2799          c = *ptr;
2800          switch (c)
2801            {
2802            case 'i':
2803            *optset |= PCRE_CASELESS;
2804            continue;
2805
2806            case 'm':
2807            *optset |= PCRE_MULTILINE;
2808            continue;
2809
2810            case 's':
2811            *optset |= PCRE_DOTALL;
2812            continue;
2813
2814            case 'x':
2815            *optset |= PCRE_EXTENDED;
2816            continue;
2817
2818            case 'X':
2819            *optset |= PCRE_EXTRA;
2820            continue;
2821
2822            case 'U':
2823            *optset |= PCRE_UNGREEDY;
2824            continue;
2825
2826            case '-':
2827            optset = &unset;
2828            continue;
2829
2830            /* A termination by ')' indicates an options-setting-only item;
2831            this is global at top level; otherwise nothing is done here and
2832            it is handled during the compiling process on a per-bracket-group
2833            basis. */
2834
2835            case ')':
2836            if (brastackptr == 0)
2837              {
2838              options = (options | set) & (~unset);
2839              set = unset = 0;     /* To save length */
2840              }
2841            /* Fall through */
2842
2843            /* A termination by ':' indicates the start of a nested group with
2844            the given options set. This is again handled at compile time, but
2845            we must allow for compiled space if any of the ims options are
2846            set. We also have to allow for resetting space at the end of
2847            the group, which is why 4 is added to the length and not just 2.
2848            If there are several changes of options within the same group, this
2849            will lead to an over-estimate on the length, but this shouldn't
2850            matter very much. We also have to allow for resetting options at
2851            the start of any alternations, which we do by setting
2852            branch_newextra to 2. Finally, we record whether the case-dependent
2853            flag ever changes within the regex. This is used by the "required
2854            character" code. */
2855
2856            case ':':
2857            if (((set|unset) & PCRE_IMS) != 0)
2858              {
2859              length += 4;
2860              branch_newextra = 2;
2861              if (((set|unset) & PCRE_CASELESS) != 0) options |= PCRE_ICHANGED;
2862              }
2863            goto END_OPTIONS;
2864
2865            /* Unrecognized option character */
2866
2867            default:
2868            *errorptr = ERR12;
2869            goto PCRE_ERROR_RETURN;
2870            }
2871          }
2872
2873        /* If we hit a closing bracket, that's it - this is a freestanding
2874        option-setting. We need to ensure that branch_extra is updated if
2875        necessary. The only values branch_newextra can have here are 0 or 2.
2876        If the value is 2, then branch_extra must either be 2 or 5, depending
2877        on whether this is a lookbehind group or not. */
2878
2879        END_OPTIONS:
2880        if (c == ')')
2881          {
2882          if (branch_newextra == 2 && (branch_extra == 0 || branch_extra == 3))
2883            branch_extra += branch_newextra;
2884          continue;
2885          }
2886
2887        /* If options were terminated by ':' control comes here. Fall through
2888        to handle the group below. */
2889        }
2890      }
2891
2892    /* Extracting brackets must be counted so we can process escapes in a
2893    Perlish way. If the number exceeds EXTRACT_BASIC_MAX we are going to
2894    need an additional 3 bytes of store per extracting bracket. */
2895
2896    else
2897      {
2898      bracount++;
2899      if (bracount > EXTRACT_BASIC_MAX) bracket_length += 3;
2900      }
2901
2902    /* Save length for computing whole length at end if there's a repeat that
2903    requires duplication of the group. Also save the current value of
2904    branch_extra, and start the new group with the new value. If non-zero, this
2905    will either be 2 for a (?imsx: group, or 3 for a lookbehind assertion. */
2906
2907    if (brastackptr >= sizeof(brastack)/sizeof(int))
2908      {
2909      *errorptr = ERR19;
2910      goto PCRE_ERROR_RETURN;
2911      }
2912
2913    bralenstack[brastackptr] = branch_extra;
2914    branch_extra = branch_newextra;
2915
2916    brastack[brastackptr++] = length;
2917    length += bracket_length;
2918    continue;
2919
2920    /* Handle ket. Look for subsequent max/min; for certain sets of values we
2921    have to replicate this bracket up to that many times. If brastackptr is
2922    0 this is an unmatched bracket which will generate an error, but take care
2923    not to try to access brastack[-1] when computing the length and restoring
2924    the branch_extra value. */
2925
2926    case ')':
2927    length += 3;
2928      {
2929      int minval = 1;
2930      int maxval = 1;
2931      int duplength;
2932
2933      if (brastackptr > 0)
2934        {
2935        duplength = length - brastack[--brastackptr];
2936        branch_extra = bralenstack[brastackptr];
2937        }
2938      else duplength = 0;
2939
2940      /* Leave ptr at the final char; for read_repeat_counts this happens
2941      automatically; for the others we need an increment. */
2942
2943      if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2, &compile_block))
2944        {
2945        ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr,
2946          &compile_block);
2947        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2948        }
2949      else if (c == '*') { minval = 0; maxval = -1; ptr++; }
2950      else if (c == '+') { maxval = -1; ptr++; }
2951      else if (c == '?') { minval = 0; ptr++; }
2952
2953      /* If the minimum is zero, we have to allow for an OP_BRAZERO before the
2954      group, and if the maximum is greater than zero, we have to replicate
2955      maxval-1 times; each replication acquires an OP_BRAZERO plus a nesting
2956      bracket set - hence the 7. */
2957
2958      if (minval == 0)
2959        {
2960        length++;
2961        if (maxval > 0) length += (maxval - 1) * (duplength + 7);
2962        }
2963
2964      /* When the minimum is greater than zero, 1 we have to replicate up to
2965      minval-1 times, with no additions required in the copies. Then, if
2966      there is a limited maximum we have to replicate up to maxval-1 times
2967      allowing for a BRAZERO item before each optional copy and nesting
2968      brackets for all but one of the optional copies. */
2969
2970      else
2971        {
2972        length += (minval - 1) * duplength;
2973        if (maxval > minval)   /* Need this test as maxval=-1 means no limit */
2974          length += (maxval - minval) * (duplength + 7) - 6;
2975        }
2976      }
2977    continue;
2978
2979    /* Non-special character. For a run of such characters the length required
2980    is the number of characters + 2, except that the maximum run length is 255.
2981    We won't get a skipped space or a non-data escape or the start of a #
2982    comment as the first character, so the length can't be zero. */
2983
2984    NORMAL_CHAR:
2985    default:
2986    length += 2;
2987    runlength = 0;
2988    do
2989      {
2990      if ((options & PCRE_EXTENDED) != 0)
2991        {
2992        if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
2993        if (c == '#')
2994          {
2995          /* The space before the ; is to avoid a warning on a silly compiler
2996          on the Macintosh. */
2997          while ((c = *(++ptr)) != 0 && c != NEWLINE) ;
2998          continue;
2999          }
3000        }
3001
3002      /* Backslash may introduce a data char or a metacharacter; stop the
3003      string before the latter. */
3004
3005      if (c == '\\')
3006        {
3007        const uschar *saveptr = ptr;
3008        c = check_escape(&ptr, errorptr, bracount, options, FALSE,
3009          &compile_block);
3010        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
3011        if (c < 0) { ptr = saveptr; break; }
3012
3013#ifdef SUPPORT_UTF8
3014        if (c > 127 && (options & PCRE_UTF8) != 0)
3015          {
3016          int i;
3017          for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
3018            if (c <= utf8_table1[i]) break;
3019          runlength += i;
3020          }
3021#endif
3022        }
3023
3024      /* Ordinary character or single-char escape */
3025
3026      runlength++;
3027      }
3028
3029    /* This "while" is the end of the "do" above. */
3030
3031    while (runlength < MAXLIT &&
3032      (compile_block.ctypes[c = *(++ptr)] & ctype_meta) == 0);
3033
3034    ptr--;
3035    length += runlength;
3036    continue;
3037    }
3038  }
3039
3040length += 4;    /* For final KET and END */
3041
3042if (length > 65539)
3043  {
3044  *errorptr = ERR20;
3045  return NULL;
3046  }
3047
3048/* Compute the size of data block needed and get it, either from malloc or
3049externally provided function. We specify "code[0]" in the offsetof() expression
3050rather than just "code", because it has been reported that one broken compiler
3051fails on "code" because it is also an independent variable. It should make no
3052difference to the value of the offsetof(). */
3053
3054size = length + offsetof(real_pcre, code[0]);
3055re = (real_pcre *)(pcre_malloc)(size);
3056
3057if (re == NULL)
3058  {
3059  *errorptr = ERR21;
3060  return NULL;
3061  }
3062
3063/* Put in the magic number, and save the size, options, and table pointer */
3064
3065re->magic_number = MAGIC_NUMBER;
3066re->size = size;
3067re->options = options;
3068re->tables = tables;
3069
3070/* Set up a starting, non-extracting bracket, then compile the expression. On
3071error, *errorptr will be set non-NULL, so we don't need to look at the result
3072of the function here. */
3073
3074ptr = (const uschar *)pattern;
3075code = re->code;
3076*code = OP_BRA;
3077bracount = 0;
3078(void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, 0,
3079  &reqchar, &countlits, &compile_block);
3080re->top_bracket = bracount;
3081re->top_backref = top_backref;
3082
3083/* If not reached end of pattern on success, there's an excess bracket. */
3084
3085if (*errorptr == NULL && *ptr != 0) *errorptr = ERR22;
3086
3087/* Fill in the terminating state and check for disastrous overflow, but
3088if debugging, leave the test till after things are printed out. */
3089
3090*code++ = OP_END;
3091
3092#ifndef DEBUG
3093if (code - re->code > length) *errorptr = ERR23;
3094#endif
3095
3096/* Give an error if there's back reference to a non-existent capturing
3097subpattern. */
3098
3099if (top_backref > re->top_bracket) *errorptr = ERR15;
3100
3101/* Failed to compile */
3102
3103if (*errorptr != NULL)
3104  {
3105  (pcre_free)(re);
3106  PCRE_ERROR_RETURN:
3107  *erroroffset = ptr - (const uschar *)pattern;
3108  return NULL;
3109  }
3110
3111/* If the anchored option was not passed, set flag if we can determine that the
3112pattern is anchored by virtue of ^ characters or \A or anything else (such as
3113starting with .* when DOTALL is set).
3114
3115Otherwise, see if we can determine what the first character has to be, because
3116that speeds up unanchored matches no end. If not, see if we can set the
3117PCRE_STARTLINE flag. This is helpful for multiline matches when all branches
3118start with ^. and also when all branches start with .* for non-DOTALL matches.
3119*/
3120
3121if ((options & PCRE_ANCHORED) == 0)
3122  {
3123  int temp_options = options;
3124  if (is_anchored(re->code, &temp_options))
3125    re->options |= PCRE_ANCHORED;
3126  else
3127    {
3128    int ch = find_firstchar(re->code, &temp_options);
3129    if (ch >= 0)
3130      {
3131      re->first_char = ch;
3132      re->options |= PCRE_FIRSTSET;
3133      }
3134    else if (is_startline(re->code))
3135      re->options |= PCRE_STARTLINE;
3136    }
3137  }
3138
3139/* Save the last required character if there are at least two literal
3140characters on all paths, or if there is no first character setting. */
3141
3142if (reqchar >= 0 && (countlits > 1 || (re->options & PCRE_FIRSTSET) == 0))
3143  {
3144  re->req_char = reqchar;
3145  re->options |= PCRE_REQCHSET;
3146  }
3147
3148/* Print out the compiled data for debugging */
3149
3150#ifdef DEBUG
3151
3152printf("Length = %d top_bracket = %d top_backref = %d\n",
3153  length, re->top_bracket, re->top_backref);
3154
3155if (re->options != 0)
3156  {
3157  printf("%s%s%s%s%s%s%s%s%s\n",
3158    ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
3159    ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
3160    ((re->options & PCRE_ICHANGED) != 0)? "case state changed " : "",
3161    ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
3162    ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
3163    ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
3164    ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",
3165    ((re->options & PCRE_EXTRA) != 0)? "extra " : "",
3166    ((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : "");
3167  }
3168
3169if ((re->options & PCRE_FIRSTSET) != 0)
3170  {
3171  if (isprint(re->first_char)) printf("First char = %c\n", re->first_char);
3172    else printf("First char = \\x%02x\n", re->first_char);
3173  }
3174
3175if ((re->options & PCRE_REQCHSET) != 0)
3176  {
3177  if (isprint(re->req_char)) printf("Req char = %c\n", re->req_char);
3178    else printf("Req char = \\x%02x\n", re->req_char);
3179  }
3180
3181code_end = code;
3182code_base = code = re->code;
3183
3184while (code < code_end)
3185  {
3186  int charlength;
3187
3188  printf("%3d ", code - code_base);
3189
3190  if (*code >= OP_BRA)
3191    {
3192    if (*code - OP_BRA > EXTRACT_BASIC_MAX)
3193      printf("%3d Bra extra", (code[1] << 8) + code[2]);
3194    else
3195      printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
3196    code += 2;
3197    }
3198
3199  else switch(*code)
3200    {
3201    case OP_OPT:
3202    printf(" %.2x %s", code[1], OP_names[*code]);
3203    code++;
3204    break;
3205
3206    case OP_CHARS:
3207    charlength = *(++code);
3208    printf("%3d ", charlength);
3209    while (charlength-- > 0)
3210      if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);
3211    break;
3212
3213    case OP_KETRMAX:
3214    case OP_KETRMIN:
3215    case OP_ALT:
3216    case OP_KET:
3217    case OP_ASSERT:
3218    case OP_ASSERT_NOT:
3219    case OP_ASSERTBACK:
3220    case OP_ASSERTBACK_NOT:
3221    case OP_ONCE:
3222    case OP_REVERSE:
3223    case OP_BRANUMBER:
3224    case OP_COND:
3225    case OP_CREF:
3226    printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
3227    code += 2;
3228    break;
3229
3230    case OP_STAR:
3231    case OP_MINSTAR:
3232    case OP_PLUS:
3233    case OP_MINPLUS:
3234    case OP_QUERY:
3235    case OP_MINQUERY:
3236    case OP_TYPESTAR:
3237    case OP_TYPEMINSTAR:
3238    case OP_TYPEPLUS:
3239    case OP_TYPEMINPLUS:
3240    case OP_TYPEQUERY:
3241    case OP_TYPEMINQUERY:
3242    if (*code >= OP_TYPESTAR)
3243      printf("    %s", OP_names[code[1]]);
3244    else if (isprint(c = code[1])) printf("    %c", c);
3245      else printf("    \\x%02x", c);
3246    printf("%s", OP_names[*code++]);
3247    break;
3248
3249    case OP_EXACT:
3250    case OP_UPTO:
3251    case OP_MINUPTO:
3252    if (isprint(c = code[3])) printf("    %c{", c);
3253      else printf("    \\x%02x{", c);
3254    if (*code != OP_EXACT) printf("0,");
3255    printf("%d}", (code[1] << 8) + code[2]);
3256    if (*code == OP_MINUPTO) printf("?");
3257    code += 3;
3258    break;
3259
3260    case OP_TYPEEXACT:
3261    case OP_TYPEUPTO:
3262    case OP_TYPEMINUPTO:
3263    printf("    %s{", OP_names[code[3]]);
3264    if (*code != OP_TYPEEXACT) printf(",");
3265    printf("%d}", (code[1] << 8) + code[2]);
3266    if (*code == OP_TYPEMINUPTO) printf("?");
3267    code += 3;
3268    break;
3269
3270    case OP_NOT:
3271    if (isprint(c = *(++code))) printf("    [^%c]", c);
3272      else printf("    [^\\x%02x]", c);
3273    break;
3274
3275    case OP_NOTSTAR:
3276    case OP_NOTMINSTAR:
3277    case OP_NOTPLUS:
3278    case OP_NOTMINPLUS:
3279    case OP_NOTQUERY:
3280    case OP_NOTMINQUERY:
3281    if (isprint(c = code[1])) printf("    [^%c]", c);
3282      else printf("    [^\\x%02x]", c);
3283    printf("%s", OP_names[*code++]);
3284    break;
3285
3286    case OP_NOTEXACT:
3287    case OP_NOTUPTO:
3288    case OP_NOTMINUPTO:
3289    if (isprint(c = code[3])) printf("    [^%c]{", c);
3290      else printf("    [^\\x%02x]{", c);
3291    if (*code != OP_NOTEXACT) printf(",");
3292    printf("%d}", (code[1] << 8) + code[2]);
3293    if (*code == OP_NOTMINUPTO) printf("?");
3294    code += 3;
3295    break;
3296
3297    case OP_REF:
3298    printf("    \\%d", (code[1] << 8) | code[2]);
3299    code += 3;
3300    goto CLASS_REF_REPEAT;
3301
3302    case OP_CLASS:
3303      {
3304      int i, min, max;
3305      code++;
3306      printf("    [");
3307
3308      for (i = 0; i < 256; i++)
3309        {
3310        if ((code[i/8] & (1 << (i&7))) != 0)
3311          {
3312          int j;
3313          for (j = i+1; j < 256; j++)
3314            if ((code[j/8] & (1 << (j&7))) == 0) break;
3315          if (i == '-' || i == ']') printf("\\");
3316          if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);
3317          if (--j > i)
3318            {
3319            printf("-");
3320            if (j == '-' || j == ']') printf("\\");
3321            if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);
3322            }
3323          i = j;
3324          }
3325        }
3326      printf("]");
3327      code += 32;
3328
3329      CLASS_REF_REPEAT:
3330
3331      switch(*code)
3332        {
3333        case OP_CRSTAR:
3334        case OP_CRMINSTAR:
3335        case OP_CRPLUS:
3336        case OP_CRMINPLUS:
3337        case OP_CRQUERY:
3338        case OP_CRMINQUERY:
3339        printf("%s", OP_names[*code]);
3340        break;
3341
3342        case OP_CRRANGE:
3343        case OP_CRMINRANGE:
3344        min = (code[1] << 8) + code[2];
3345        max = (code[3] << 8) + code[4];
3346        if (max == 0) printf("{%d,}", min);
3347        else printf("{%d,%d}", min, max);
3348        if (*code == OP_CRMINRANGE) printf("?");
3349        code += 4;
3350        break;
3351
3352        default:
3353        code--;
3354        }
3355      }
3356    break;
3357
3358    /* Anything else is just a one-node item */
3359
3360    default:
3361    printf("    %s", OP_names[*code]);
3362    break;
3363    }
3364
3365  code++;
3366  printf("\n");
3367  }
3368printf("------------------------------------------------------------------\n");
3369
3370/* This check is done here in the debugging case so that the code that
3371was compiled can be seen. */
3372
3373if (code - re->code > length)
3374  {
3375  *errorptr = ERR23;
3376  (pcre_free)(re);
3377  *erroroffset = ptr - (uschar *)pattern;
3378  return NULL;
3379  }
3380#endif
3381
3382return (pcre *)re;
3383}
3384
3385
3386
3387/*************************************************
3388*          Match a back-reference                *
3389*************************************************/
3390
3391/* If a back reference hasn't been set, the length that is passed is greater
3392than the number of characters left in the string, so the match fails.
3393
3394Arguments:
3395  offset      index into the offset vector
3396  eptr        points into the subject
3397  length      length to be matched
3398  md          points to match data block
3399  ims         the ims flags
3400
3401Returns:      TRUE if matched
3402*/
3403
3404static BOOL
3405match_ref(int offset, register const uschar *eptr, int length, match_data *md,
3406  unsigned long int ims)
3407{
3408const uschar *p = md->start_subject + md->offset_vector[offset];
3409
3410#ifdef DEBUG
3411if (eptr >= md->end_subject)
3412  printf("matching subject <null>");
3413else
3414  {
3415  printf("matching subject ");
3416  pchars(eptr, length, TRUE, md);
3417  }
3418printf(" against backref ");
3419pchars(p, length, FALSE, md);
3420printf("\n");
3421#endif
3422
3423/* Always fail if not enough characters left */
3424
3425if (length > md->end_subject - eptr) return FALSE;
3426
3427/* Separate the caselesss case for speed */
3428
3429if ((ims & PCRE_CASELESS) != 0)
3430  {
3431  while (length-- > 0)
3432    if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
3433  }
3434else
3435  { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
3436
3437return TRUE;
3438}
3439
3440
3441
3442/*************************************************
3443*         Match from current position            *
3444*************************************************/
3445
3446/* On entry ecode points to the first opcode, and eptr to the first character
3447in the subject string, while eptrb holds the value of eptr at the start of the
3448last bracketed group - used for breaking infinite loops matching zero-length
3449strings.
3450
3451Arguments:
3452   eptr        pointer in subject
3453   ecode       position in code
3454   offset_top  current top pointer
3455   md          pointer to "static" info for the match
3456   ims         current /i, /m, and /s options
3457   eptrb       pointer to chain of blocks containing eptr at start of
3458                 brackets - for testing for empty matches
3459   flags       can contain
3460                 match_condassert - this is an assertion condition
3461                 match_isgroup - this is the start of a bracketed group
3462
3463Returns:       TRUE if matched
3464*/
3465
3466static BOOL
3467match(register const uschar *eptr, register const uschar *ecode,
3468  int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
3469  int flags)
3470{
3471unsigned long int original_ims = ims;   /* Save for resetting on ')' */
3472eptrblock newptrb;
3473
3474/* At the start of a bracketed group, add the current subject pointer to the
3475stack of such pointers, to be re-instated at the end of the group when we hit
3476the closing ket. When match() is called in other circumstances, we don't add to
3477the stack. */
3478
3479if ((flags & match_isgroup) != 0)
3480  {
3481  newptrb.prev = eptrb;
3482  newptrb.saved_eptr = eptr;
3483  eptrb = &newptrb;
3484  }
3485
3486/* Now start processing the operations. */
3487
3488for (;;)
3489  {
3490  int op = (int)*ecode;
3491  int min, max, ctype;
3492  register int i;
3493  register int c;
3494  BOOL minimize = FALSE;
3495
3496  /* Opening capturing bracket. If there is space in the offset vector, save
3497  the current subject position in the working slot at the top of the vector. We
3498  mustn't change the current values of the data slot, because they may be set
3499  from a previous iteration of this group, and be referred to by a reference
3500  inside the group.
3501
3502  If the bracket fails to match, we need to restore this value and also the
3503  values of the final offsets, in case they were set by a previous iteration of
3504  the same bracket.
3505
3506  If there isn't enough space in the offset vector, treat this as if it were a
3507  non-capturing bracket. Don't worry about setting the flag for the error case
3508  here; that is handled in the code for KET. */
3509
3510  if (op > OP_BRA)
3511    {
3512    int offset;
3513    int number = op - OP_BRA;
3514
3515    /* For extended extraction brackets (large number), we have to fish out the
3516    number from a dummy opcode at the start. */
3517
3518    if (number > EXTRACT_BASIC_MAX) number = (ecode[4] << 8) | ecode[5];
3519    offset = number << 1;
3520
3521#ifdef DEBUG
3522    printf("start bracket %d subject=", number);
3523    pchars(eptr, 16, TRUE, md);
3524    printf("\n");
3525#endif
3526
3527    if (offset < md->offset_max)
3528      {
3529      int save_offset1 = md->offset_vector[offset];
3530      int save_offset2 = md->offset_vector[offset+1];
3531      int save_offset3 = md->offset_vector[md->offset_end - number];
3532
3533      DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
3534      md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
3535
3536      do
3537        {
3538        if (match(eptr, ecode+3, offset_top, md, ims, eptrb, match_isgroup))
3539          return TRUE;
3540        ecode += (ecode[1] << 8) + ecode[2];
3541        }
3542      while (*ecode == OP_ALT);
3543
3544      DPRINTF(("bracket %d failed\n", number));
3545
3546      md->offset_vector[offset] = save_offset1;
3547      md->offset_vector[offset+1] = save_offset2;
3548      md->offset_vector[md->offset_end - number] = save_offset3;
3549
3550      return FALSE;
3551      }
3552
3553    /* Insufficient room for saving captured contents */
3554
3555    else op = OP_BRA;
3556    }
3557
3558  /* Other types of node can be handled by a switch */
3559
3560  switch(op)
3561    {
3562    case OP_BRA:     /* Non-capturing bracket: optimized */
3563    DPRINTF(("start bracket 0\n"));
3564    do
3565      {
3566      if (match(eptr, ecode+3, offset_top, md, ims, eptrb, match_isgroup))
3567        return TRUE;
3568      ecode += (ecode[1] << 8) + ecode[2];
3569      }
3570    while (*ecode == OP_ALT);
3571    DPRINTF(("bracket 0 failed\n"));
3572    return FALSE;
3573
3574    /* Conditional group: compilation checked that there are no more than
3575    two branches. If the condition is false, skipping the first branch takes us
3576    past the end if there is only one branch, but that's OK because that is
3577    exactly what going to the ket would do. */
3578
3579    case OP_COND:
3580    if (ecode[3] == OP_CREF)         /* Condition is extraction test */
3581      {
3582      int offset = (ecode[4] << 9) | (ecode[5] << 1); /* Doubled ref number */
3583      return match(eptr,
3584        ecode + ((offset < offset_top && md->offset_vector[offset] >= 0)?
3585          6 : 3 + (ecode[1] << 8) + ecode[2]),
3586        offset_top, md, ims, eptrb, match_isgroup);
3587      }
3588
3589    /* The condition is an assertion. Call match() to evaluate it - setting
3590    the final argument TRUE causes it to stop at the end of an assertion. */
3591
3592    else
3593      {
3594      if (match(eptr, ecode+3, offset_top, md, ims, NULL,
3595          match_condassert | match_isgroup))
3596        {
3597        ecode += 3 + (ecode[4] << 8) + ecode[5];
3598        while (*ecode == OP_ALT) ecode += (ecode[1] << 8) + ecode[2];
3599        }
3600      else ecode += (ecode[1] << 8) + ecode[2];
3601      return match(eptr, ecode+3, offset_top, md, ims, eptrb, match_isgroup);
3602      }
3603    /* Control never reaches here */
3604
3605    /* Skip over conditional reference or large extraction number data if
3606    encountered. */
3607
3608    case OP_CREF:
3609    case OP_BRANUMBER:
3610    ecode += 3;
3611    break;
3612
3613    /* End of the pattern. If PCRE_NOTEMPTY is set, fail if we have matched
3614    an empty string - recursion will then try other alternatives, if any. */
3615
3616    case OP_END:
3617    if (md->notempty && eptr == md->start_match) return FALSE;
3618    md->end_match_ptr = eptr;          /* Record where we ended */
3619    md->end_offset_top = offset_top;   /* and how many extracts were taken */
3620    return TRUE;
3621
3622    /* Change option settings */
3623
3624    case OP_OPT:
3625    ims = ecode[1];
3626    ecode += 2;
3627    DPRINTF(("ims set to %02lx\n", ims));
3628    break;
3629
3630    /* Assertion brackets. Check the alternative branches in turn - the
3631    matching won't pass the KET for an assertion. If any one branch matches,
3632    the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
3633    start of each branch to move the current point backwards, so the code at
3634    this level is identical to the lookahead case. */
3635
3636    case OP_ASSERT:
3637    case OP_ASSERTBACK:
3638    do
3639      {
3640      if (match(eptr, ecode+3, offset_top, md, ims, NULL, match_isgroup)) break;
3641      ecode += (ecode[1] << 8) + ecode[2];
3642      }
3643    while (*ecode == OP_ALT);
3644    if (*ecode == OP_KET) return FALSE;
3645
3646    /* If checking an assertion for a condition, return TRUE. */
3647
3648    if ((flags & match_condassert) != 0) return TRUE;
3649
3650    /* Continue from after the assertion, updating the offsets high water
3651    mark, since extracts may have been taken during the assertion. */
3652
3653    do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
3654    ecode += 3;
3655    offset_top = md->end_offset_top;
3656    continue;
3657
3658    /* Negative assertion: all branches must fail to match */
3659
3660    case OP_ASSERT_NOT:
3661    case OP_ASSERTBACK_NOT:
3662    do
3663      {
3664      if (match(eptr, ecode+3, offset_top, md, ims, NULL, match_isgroup))
3665        return FALSE;
3666      ecode += (ecode[1] << 8) + ecode[2];
3667      }
3668    while (*ecode == OP_ALT);
3669
3670    if ((flags & match_condassert) != 0) return TRUE;
3671
3672    ecode += 3;
3673    continue;
3674
3675    /* Move the subject pointer back. This occurs only at the start of
3676    each branch of a lookbehind assertion. If we are too close to the start to
3677    move back, this match function fails. When working with UTF-8 we move
3678    back a number of characters, not bytes. */
3679
3680    case OP_REVERSE:
3681#ifdef SUPPORT_UTF8
3682    c = (ecode[1] << 8) + ecode[2];
3683    for (i = 0; i < c; i++)
3684      {
3685      eptr--;
3686      BACKCHAR(eptr)
3687      }
3688#else
3689    eptr -= (ecode[1] << 8) + ecode[2];
3690#endif
3691
3692    if (eptr < md->start_subject) return FALSE;
3693    ecode += 3;
3694    break;
3695
3696    /* Recursion matches the current regex, nested. If there are any capturing
3697    brackets started but not finished, we have to save their starting points
3698    and reinstate them after the recursion. However, we don't know how many
3699    such there are (offset_top records the completed total) so we just have
3700    to save all the potential data. There may be up to 99 such values, which
3701    is a bit large to put on the stack, but using malloc for small numbers
3702    seems expensive. As a compromise, the stack is used when there are fewer
3703    than 16 values to store; otherwise malloc is used. A problem is what to do
3704    if the malloc fails ... there is no way of returning to the top level with
3705    an error. Save the top 15 values on the stack, and accept that the rest
3706    may be wrong. */
3707
3708    case OP_RECURSE:
3709      {
3710      BOOL rc;
3711      int *save;
3712      int stacksave[15];
3713
3714      c = md->offset_max;
3715
3716      if (c < 16) save = stacksave; else
3717        {
3718        save = (int *)(pcre_malloc)((c+1) * sizeof(int));
3719        if (save == NULL)
3720          {
3721          save = stacksave;
3722          c = 15;
3723          }
3724        }
3725
3726      for (i = 1; i <= c; i++)
3727        save[i] = md->offset_vector[md->offset_end - i];
3728      rc = match(eptr, md->start_pattern, offset_top, md, ims, eptrb,
3729        match_isgroup);
3730      for (i = 1; i <= c; i++)
3731        md->offset_vector[md->offset_end - i] = save[i];
3732      if (save != stacksave) (pcre_free)(save);
3733      if (!rc) return FALSE;
3734
3735      /* In case the recursion has set more capturing values, save the final
3736      number, then move along the subject till after the recursive match,
3737      and advance one byte in the pattern code. */
3738
3739      offset_top = md->end_offset_top;
3740      eptr = md->end_match_ptr;
3741      ecode++;
3742      }
3743    break;
3744
3745    /* "Once" brackets are like assertion brackets except that after a match,
3746    the point in the subject string is not moved back. Thus there can never be
3747    a move back into the brackets. Check the alternative branches in turn - the
3748    matching won't pass the KET for this kind of subpattern. If any one branch
3749    matches, we carry on as at the end of a normal bracket, leaving the subject
3750    pointer. */
3751
3752    case OP_ONCE:
3753      {
3754      const uschar *prev = ecode;
3755      const uschar *saved_eptr = eptr;
3756
3757      do
3758        {
3759        if (match(eptr, ecode+3, offset_top, md, ims, eptrb, match_isgroup))
3760          break;
3761        ecode += (ecode[1] << 8) + ecode[2];
3762        }
3763      while (*ecode == OP_ALT);
3764
3765      /* If hit the end of the group (which could be repeated), fail */
3766
3767      if (*ecode != OP_ONCE && *ecode != OP_ALT) return FALSE;
3768
3769      /* Continue as from after the assertion, updating the offsets high water
3770      mark, since extracts may have been taken. */
3771
3772      do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
3773
3774      offset_top = md->end_offset_top;
3775      eptr = md->end_match_ptr;
3776
3777      /* For a non-repeating ket, just continue at this level. This also
3778      happens for a repeating ket if no characters were matched in the group.
3779      This is the forcible breaking of infinite loops as implemented in Perl
3780      5.005. If there is an options reset, it will get obeyed in the normal
3781      course of events. */
3782
3783      if (*ecode == OP_KET || eptr == saved_eptr)
3784        {
3785        ecode += 3;
3786        break;
3787        }
3788
3789      /* The repeating kets try the rest of the pattern or restart from the
3790      preceding bracket, in the appropriate order. We need to reset any options
3791      that changed within the bracket before re-running it, so check the next
3792      opcode. */
3793
3794      if (ecode[3] == OP_OPT)
3795        {
3796        ims = (ims & ~PCRE_IMS) | ecode[4];
3797        DPRINTF(("ims set to %02lx at group repeat\n", ims));
3798        }
3799
3800      if (*ecode == OP_KETRMIN)
3801        {
3802        if (match(eptr, ecode+3, offset_top, md, ims, eptrb, 0) ||
3803            match(eptr, prev, offset_top, md, ims, eptrb, match_isgroup))
3804              return TRUE;
3805        }
3806      else  /* OP_KETRMAX */
3807        {
3808        if (match(eptr, prev, offset_top, md, ims, eptrb, match_isgroup) ||
3809            match(eptr, ecode+3, offset_top, md, ims, eptrb, 0)) return TRUE;
3810        }
3811      }
3812    return FALSE;
3813
3814    /* An alternation is the end of a branch; scan along to find the end of the
3815    bracketed group and go to there. */
3816
3817    case OP_ALT:
3818    do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
3819    break;
3820
3821    /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
3822    that it may occur zero times. It may repeat infinitely, or not at all -
3823    i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
3824    repeat limits are compiled as a number of copies, with the optional ones
3825    preceded by BRAZERO or BRAMINZERO. */
3826
3827    case OP_BRAZERO:
3828      {
3829      const uschar *next = ecode+1;
3830      if (match(eptr, next, offset_top, md, ims, eptrb, match_isgroup))
3831        return TRUE;
3832      do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
3833      ecode = next + 3;
3834      }
3835    break;
3836
3837    case OP_BRAMINZERO:
3838      {
3839      const uschar *next = ecode+1;
3840      do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
3841      if (match(eptr, next+3, offset_top, md, ims, eptrb, match_isgroup))
3842        return TRUE;
3843      ecode++;
3844      }
3845    break;
3846
3847    /* End of a group, repeated or non-repeating. If we are at the end of
3848    an assertion "group", stop matching and return TRUE, but record the
3849    current high water mark for use by positive assertions. Do this also
3850    for the "once" (not-backup up) groups. */
3851
3852    case OP_KET:
3853    case OP_KETRMIN:
3854    case OP_KETRMAX:
3855      {
3856      const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
3857      const uschar *saved_eptr = eptrb->saved_eptr;
3858
3859      eptrb = eptrb->prev;    /* Back up the stack of bracket start pointers */
3860
3861      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
3862          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
3863          *prev == OP_ONCE)
3864        {
3865        md->end_match_ptr = eptr;      /* For ONCE */
3866        md->end_offset_top = offset_top;
3867        return TRUE;
3868        }
3869
3870      /* In all other cases except a conditional group we have to check the
3871      group number back at the start and if necessary complete handling an
3872      extraction by setting the offsets and bumping the high water mark. */
3873
3874      if (*prev != OP_COND)
3875        {
3876        int offset;
3877        int number = *prev - OP_BRA;
3878
3879        /* For extended extraction brackets (large number), we have to fish out
3880        the number from a dummy opcode at the start. */
3881
3882        if (number > EXTRACT_BASIC_MAX) number = (prev[4] << 8) | prev[5];
3883        offset = number << 1;
3884
3885#ifdef DEBUG
3886        printf("end bracket %d", number);
3887        printf("\n");
3888#endif
3889
3890        if (number > 0)
3891          {
3892          if (offset >= md->offset_max) md->offset_overflow = TRUE; else
3893            {
3894            md->offset_vector[offset] =
3895              md->offset_vector[md->offset_end - number];
3896            md->offset_vector[offset+1] = eptr - md->start_subject;
3897            if (offset_top <= offset) offset_top = offset + 2;
3898            }
3899          }
3900        }
3901
3902      /* Reset the value of the ims flags, in case they got changed during
3903      the group. */
3904
3905      ims = original_ims;
3906      DPRINTF(("ims reset to %02lx\n", ims));
3907
3908      /* For a non-repeating ket, just continue at this level. This also
3909      happens for a repeating ket if no characters were matched in the group.
3910      This is the forcible breaking of infinite loops as implemented in Perl
3911      5.005. If there is an options reset, it will get obeyed in the normal
3912      course of events. */
3913
3914      if (*ecode == OP_KET || eptr == saved_eptr)
3915        {
3916        ecode += 3;
3917        break;
3918        }
3919
3920      /* The repeating kets try the rest of the pattern or restart from the
3921      preceding bracket, in the appropriate order. */
3922
3923      if (*ecode == OP_KETRMIN)
3924        {
3925        if (match(eptr, ecode+3, offset_top, md, ims, eptrb, 0) ||
3926            match(eptr, prev, offset_top, md, ims, eptrb, match_isgroup))
3927              return TRUE;
3928        }
3929      else  /* OP_KETRMAX */
3930        {
3931        if (match(eptr, prev, offset_top, md, ims, eptrb, match_isgroup) ||
3932            match(eptr, ecode+3, offset_top, md, ims, eptrb, 0)) return TRUE;
3933        }
3934      }
3935    return FALSE;
3936
3937    /* Start of subject unless notbol, or after internal newline if multiline */
3938
3939    case OP_CIRC:
3940    if (md->notbol && eptr == md->start_subject) return FALSE;
3941    if ((ims & PCRE_MULTILINE) != 0)
3942      {
3943      if (eptr != md->start_subject && eptr[-1] != NEWLINE) return FALSE;
3944      ecode++;
3945      break;
3946      }
3947    /* ... else fall through */
3948
3949    /* Start of subject assertion */
3950
3951    case OP_SOD:
3952    if (eptr != md->start_subject) return FALSE;
3953    ecode++;
3954    break;
3955
3956    /* Assert before internal newline if multiline, or before a terminating
3957    newline unless endonly is set, else end of subject unless noteol is set. */
3958
3959    case OP_DOLL:
3960    if ((ims & PCRE_MULTILINE) != 0)
3961      {
3962      if (eptr < md->end_subject) { if (*eptr != NEWLINE) return FALSE; }
3963        else { if (md->noteol) return FALSE; }
3964      ecode++;
3965      break;
3966      }
3967    else
3968      {
3969      if (md->noteol) return FALSE;
3970      if (!md->endonly)
3971        {
3972        if (eptr < md->end_subject - 1 ||
3973           (eptr == md->end_subject - 1 && *eptr != NEWLINE)) return FALSE;
3974
3975        ecode++;
3976        break;
3977        }
3978      }
3979    /* ... else fall through */
3980
3981    /* End of subject assertion (\z) */
3982
3983    case OP_EOD:
3984    if (eptr < md->end_subject) return FALSE;
3985    ecode++;
3986    break;
3987
3988    /* End of subject or ending \n assertion (\Z) */
3989
3990    case OP_EODN:
3991    if (eptr < md->end_subject - 1 ||
3992       (eptr == md->end_subject - 1 && *eptr != NEWLINE)) return FALSE;
3993    ecode++;
3994    break;
3995
3996    /* Word boundary assertions */
3997
3998    case OP_NOT_WORD_BOUNDARY:
3999    case OP_WORD_BOUNDARY:
4000      {
4001      BOOL prev_is_word = (eptr != md->start_subject) &&
4002        ((md->ctypes[eptr[-1]] & ctype_word) != 0);
4003      BOOL cur_is_word = (eptr < md->end_subject) &&
4004        ((md->ctypes[*eptr] & ctype_word) != 0);
4005      if ((*ecode++ == OP_WORD_BOUNDARY)?
4006           cur_is_word == prev_is_word : cur_is_word != prev_is_word)
4007        return FALSE;
4008      }
4009    break;
4010
4011    /* Match a single character type; inline for speed */
4012
4013    case OP_ANY:
4014    if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)
4015      return FALSE;
4016    if (eptr++ >= md->end_subject) return FALSE;
4017#ifdef SUPPORT_UTF8
4018    if (md->utf8)
4019      while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
4020#endif
4021    ecode++;
4022    break;
4023
4024    case OP_NOT_DIGIT:
4025    if (eptr >= md->end_subject ||
4026       (md->ctypes[*eptr++] & ctype_digit) != 0)
4027      return FALSE;
4028    ecode++;
4029    break;
4030
4031    case OP_DIGIT:
4032    if (eptr >= md->end_subject ||
4033       (md->ctypes[*eptr++] & ctype_digit) == 0)
4034      return FALSE;
4035    ecode++;
4036    break;
4037
4038    case OP_NOT_WHITESPACE:
4039    if (eptr >= md->end_subject ||
4040       (md->ctypes[*eptr++] & ctype_space) != 0)
4041      return FALSE;
4042    ecode++;
4043    break;
4044
4045    case OP_WHITESPACE:
4046    if (eptr >= md->end_subject ||
4047       (md->ctypes[*eptr++] & ctype_space) == 0)
4048      return FALSE;
4049    ecode++;
4050    break;
4051
4052    case OP_NOT_WORDCHAR:
4053    if (eptr >= md->end_subject ||
4054       (md->ctypes[*eptr++] & ctype_word) != 0)
4055      return FALSE;
4056    ecode++;
4057    break;
4058
4059    case OP_WORDCHAR:
4060    if (eptr >= md->end_subject ||
4061       (md->ctypes[*eptr++] & ctype_word) == 0)
4062      return FALSE;
4063    ecode++;
4064    break;
4065
4066    /* Match a back reference, possibly repeatedly. Look past the end of the
4067    item to see if there is repeat information following. The code is similar
4068    to that for character classes, but repeated for efficiency. Then obey
4069    similar code to character type repeats - written out again for speed.
4070    However, if the referenced string is the empty string, always treat
4071    it as matched, any number of times (otherwise there could be infinite
4072    loops). */
4073
4074    case OP_REF:
4075      {
4076      int length;
4077      int offset = (ecode[1] << 9) | (ecode[2] << 1); /* Doubled ref number */
4078      ecode += 3;                                     /* Advance past item */
4079
4080      /* If the reference is unset, set the length to be longer than the amount
4081      of subject left; this ensures that every attempt at a match fails. We
4082      can't just fail here, because of the possibility of quantifiers with zero
4083      minima. */
4084
4085      length = (offset >= offset_top || md->offset_vector[offset] < 0)?
4086        md->end_subject - eptr + 1 :
4087        md->offset_vector[offset+1] - md->offset_vector[offset];
4088
4089      /* Set up for repetition, or handle the non-repeated case */
4090
4091      switch (*ecode)
4092        {
4093        case OP_CRSTAR:
4094        case OP_CRMINSTAR:
4095        case OP_CRPLUS:
4096        case OP_CRMINPLUS:
4097        case OP_CRQUERY:
4098        case OP_CRMINQUERY:
4099        c = *ecode++ - OP_CRSTAR;
4100        minimize = (c & 1) != 0;
4101        min = rep_min[c];                 /* Pick up values from tables; */
4102        max = rep_max[c];                 /* zero for max => infinity */
4103        if (max == 0) max = INT_MAX;
4104        break;
4105
4106        case OP_CRRANGE:
4107        case OP_CRMINRANGE:
4108        minimize = (*ecode == OP_CRMINRANGE);
4109        min = (ecode[1] << 8) + ecode[2];
4110        max = (ecode[3] << 8) + ecode[4];
4111        if (max == 0) max = INT_MAX;
4112        ecode += 5;
4113        break;
4114
4115        default:               /* No repeat follows */
4116        if (!match_ref(offset, eptr, length, md, ims)) return FALSE;
4117        eptr += length;
4118        continue;              /* With the main loop */
4119        }
4120
4121      /* If the length of the reference is zero, just continue with the
4122      main loop. */
4123
4124      if (length == 0) continue;
4125
4126      /* First, ensure the minimum number of matches are present. We get back
4127      the length of the reference string explicitly rather than passing the
4128      address of eptr, so that eptr can be a register variable. */
4129
4130      for (i = 1; i <= min; i++)
4131        {
4132        if (!match_ref(offset, eptr, length, md, ims)) return FALSE;
4133        eptr += length;
4134        }
4135
4136      /* If min = max, continue at the same level without recursion.
4137      They are not both allowed to be zero. */
4138
4139      if (min == max) continue;
4140
4141      /* If minimizing, keep trying and advancing the pointer */
4142
4143      if (minimize)
4144        {
4145        for (i = min;; i++)
4146          {
4147          if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
4148            return TRUE;
4149          if (i >= max || !match_ref(offset, eptr, length, md, ims))
4150            return FALSE;
4151          eptr += length;
4152          }
4153        /* Control never gets here */
4154        }
4155
4156      /* If maximizing, find the longest string and work backwards */
4157
4158      else
4159        {
4160        const uschar *pp = eptr;
4161        for (i = min; i < max; i++)
4162          {
4163          if (!match_ref(offset, eptr, length, md, ims)) break;
4164          eptr += length;
4165          }
4166        while (eptr >= pp)
4167          {
4168          if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
4169            return TRUE;
4170          eptr -= length;
4171          }
4172        return FALSE;
4173        }
4174      }
4175    /* Control never gets here */
4176
4177
4178
4179    /* Match a character class, possibly repeatedly. Look past the end of the
4180    item to see if there is repeat information following. Then obey similar
4181    code to character type repeats - written out again for speed. */
4182
4183    case OP_CLASS:
4184      {
4185      const uschar *data = ecode + 1;  /* Save for matching */
4186      ecode += 33;                     /* Advance past the item */
4187
4188      switch (*ecode)
4189        {
4190        case OP_CRSTAR:
4191        case OP_CRMINSTAR:
4192        case OP_CRPLUS:
4193        case OP_CRMINPLUS:
4194        case OP_CRQUERY:
4195        case OP_CRMINQUERY:
4196        c = *ecode++ - OP_CRSTAR;
4197        minimize = (c & 1) != 0;
4198        min = rep_min[c];                 /* Pick up values from tables; */
4199        max = rep_max[c];                 /* zero for max => infinity */
4200        if (max == 0) max = INT_MAX;
4201        break;
4202
4203        case OP_CRRANGE:
4204        case OP_CRMINRANGE:
4205        minimize = (*ecode == OP_CRMINRANGE);
4206        min = (ecode[1] << 8) + ecode[2];
4207        max = (ecode[3] << 8) + ecode[4];
4208        if (max == 0) max = INT_MAX;
4209        ecode += 5;
4210        break;
4211
4212        default:               /* No repeat follows */
4213        min = max = 1;
4214        break;
4215        }
4216
4217      /* First, ensure the minimum number of matches are present. */
4218
4219      for (i = 1; i <= min; i++)
4220        {
4221        if (eptr >= md->end_subject) return FALSE;
4222        GETCHARINC(c, eptr)         /* Get character; increment eptr */
4223
4224#ifdef SUPPORT_UTF8
4225        /* We do not yet support class members > 255 */
4226        if (c > 255) return FALSE;
4227#endif
4228
4229        if ((data[c/8] & (1 << (c&7))) != 0) continue;
4230        return FALSE;
4231        }
4232
4233      /* If max == min we can continue with the main loop without the
4234      need to recurse. */
4235
4236      if (min == max) continue;
4237
4238      /* If minimizing, keep testing the rest of the expression and advancing
4239      the pointer while it matches the class. */
4240
4241      if (minimize)
4242        {
4243        for (i = min;; i++)
4244          {
4245          if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
4246            return TRUE;
4247          if (i >= max || eptr >= md->end_subject) return FALSE;
4248          GETCHARINC(c, eptr)       /* Get character; increment eptr */
4249
4250#ifdef SUPPORT_UTF8
4251          /* We do not yet support class members > 255 */
4252          if (c > 255) return FALSE;
4253#endif
4254          if ((data[c/8] & (1 << (c&7))) != 0) continue;
4255          return FALSE;
4256          }
4257        /* Control never gets here */
4258        }
4259
4260      /* If maximizing, find the longest possible run, then work backwards. */
4261
4262      else
4263        {
4264        const uschar *pp = eptr;
4265        int len = 1;
4266        for (i = min; i < max; i++)
4267          {
4268          if (eptr >= md->end_subject) break;
4269          GETCHARLEN(c, eptr, len)  /* Get character, set length if UTF-8 */
4270
4271#ifdef SUPPORT_UTF8
4272          /* We do not yet support class members > 255 */
4273          if (c > 255) break;
4274#endif
4275          if ((data[c/8] & (1 << (c&7))) == 0) break;
4276          eptr += len;
4277          }
4278
4279        while (eptr >= pp)
4280          {
4281          if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
4282            return TRUE;
4283
4284#ifdef SUPPORT_UTF8
4285          BACKCHAR(eptr)
4286#endif
4287          }
4288        return FALSE;
4289        }
4290      }
4291    /* Control never gets here */
4292
4293    /* Match a run of characters */
4294
4295    case OP_CHARS:
4296      {
4297      register int length = ecode[1];
4298      ecode += 2;
4299
4300#ifdef DEBUG    /* Sigh. Some compilers never learn. */
4301      if (eptr >= md->end_subject)
4302        printf("matching subject <null> against pattern ");
4303      else
4304        {
4305        printf("matching subject ");
4306        pchars(eptr, length, TRUE, md);
4307        printf(" against pattern ");
4308        }
4309      pchars(ecode, length, FALSE, md);
4310      printf("\n");
4311#endif
4312
4313      if (length > md->end_subject - eptr) return FALSE;
4314      if ((ims & PCRE_CASELESS) != 0)
4315        {
4316        while (length-- > 0)
4317          if (md->lcc[*ecode++] != md->lcc[*eptr++])
4318            return FALSE;
4319        }
4320      else
4321        {
4322        while (length-- > 0) if (*ecode++ != *eptr++) return FALSE;
4323        }
4324      }
4325    break;
4326
4327    /* Match a single character repeatedly; different opcodes share code. */
4328
4329    case OP_EXACT:
4330    min = max = (ecode[1] << 8) + ecode[2];
4331    ecode += 3;
4332    goto REPEATCHAR;
4333
4334    case OP_UPTO:
4335    case OP_MINUPTO:
4336    min = 0;
4337    max = (ecode[1] << 8) + ecode[2];
4338    minimize = *ecode == OP_MINUPTO;
4339    ecode += 3;
4340    goto REPEATCHAR;
4341
4342    case OP_STAR:
4343    case OP_MINSTAR:
4344    case OP_PLUS:
4345    case OP_MINPLUS:
4346    case OP_QUERY:
4347    case OP_MINQUERY:
4348    c = *ecode++ - OP_STAR;
4349    minimize = (c & 1) != 0;
4350    min = rep_min[c];                 /* Pick up values from tables; */
4351    max = rep_max[c];                 /* zero for max => infinity */
4352    if (max == 0) max = INT_MAX;
4353
4354    /* Common code for all repeated single-character matches. We can give
4355    up quickly if there are fewer than the minimum number of characters left in
4356    the subject. */
4357
4358    REPEATCHAR:
4359    if (min > md->end_subject - eptr) return FALSE;
4360    c = *ecode++;
4361
4362    /* The code is duplicated for the caseless and caseful cases, for speed,
4363    since matching characters is likely to be quite common. First, ensure the
4364    minimum number of matches are present. If min = max, continue at the same
4365    level without recursing. Otherwise, if minimizing, keep trying the rest of
4366    the expression and advancing one matching character if failing, up to the
4367    maximum. Alternatively, if maximizing, find the maximum number of
4368    characters and work backwards. */
4369
4370    DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,
4371      max, eptr));
4372
4373    if ((ims & PCRE_CASELESS) != 0)
4374      {
4375      c = md->lcc[c];
4376      for (i = 1; i <= min; i++)
4377        if (c != md->lcc[*eptr++]) return FALSE;
4378      if (min == max) continue;
4379      if (minimize)
4380        {
4381        for (i = min;; i++)
4382          {
4383          if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
4384            return TRUE;
4385          if (i >= max || eptr >= md->end_subject ||
4386              c != md->lcc[*eptr++])
4387            return FALSE;
4388          }
4389        /* Control never gets here */
4390        }
4391      else
4392        {
4393        const uschar *pp = eptr;
4394        for (i = min; i < max; i++)
4395          {
4396          if (eptr >= md->end_subject || c != md->lcc[*eptr]) break;
4397          eptr++;
4398          }
4399        while (eptr >= pp)
4400          if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
4401            return TRUE;
4402        return FALSE;
4403        }
4404      /* Control never gets here */
4405      }
4406
4407    /* Caseful comparisons */
4408
4409    else
4410      {
4411      for (i = 1; i <= min; i++) if (c != *eptr++) return FALSE;
4412      if (min == max) continue;
4413      if (minimize)
4414        {
4415        for (i = min;; i++)
4416          {
4417          if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
4418            return TRUE;
4419          if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE;
4420          }
4421        /* Control never gets here */
4422        }
4423      else
4424        {
4425        const uschar *pp = eptr;
4426        for (i = min; i < max; i++)
4427          {
4428          if (eptr >= md->end_subject || c != *eptr) break;
4429          eptr++;
4430          }
4431        while (eptr >= pp)
4432         if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
4433           return TRUE;
4434        return FALSE;
4435        }
4436      }
4437    /* Control never gets here */
4438
4439    /* Match a negated single character */
4440
4441    case OP_NOT:
4442    if (eptr >= md->end_subject) return FALSE;
4443    ecode++;
4444    if ((ims & PCRE_CASELESS) != 0)
4445      {
4446      if (md->lcc[*ecode++] == md->lcc[*eptr++]) return FALSE;
4447      }
4448    else
4449      {
4450      if (*ecode++ == *eptr++) return FALSE;
4451      }
4452    break;
4453
4454    /* Match a negated single character repeatedly. This is almost a repeat of
4455    the code for a repeated single character, but I haven't found a nice way of
4456    commoning these up that doesn't require a test of the positive/negative
4457    option for each character match. Maybe that wouldn't add very much to the
4458    time taken, but character matching *is* what this is all about... */
4459
4460    case OP_NOTEXACT:
4461    min = max = (ecode[1] << 8) + ecode[2];
4462    ecode += 3;
4463    goto REPEATNOTCHAR;
4464
4465    case OP_NOTUPTO:
4466    case OP_NOTMINUPTO:
4467    min = 0;
4468    max = (ecode[1] << 8) + ecode[2];
4469    minimize = *ecode == OP_NOTMINUPTO;
4470    ecode += 3;
4471    goto REPEATNOTCHAR;
4472
4473    case OP_NOTSTAR:
4474    case OP_NOTMINSTAR:
4475    case OP_NOTPLUS:
4476    case OP_NOTMINPLUS:
4477    case OP_NOTQUERY:
4478    case OP_NOTMINQUERY:
4479    c = *ecode++ - OP_NOTSTAR;
4480    minimize = (c & 1) != 0;
4481    min = rep_min[c];                 /* Pick up values from tables; */
4482    max = rep_max[c];                 /* zero for max => infinity */
4483    if (max == 0) max = INT_MAX;
4484
4485    /* Common code for all repeated single-character matches. We can give
4486    up quickly if there are fewer than the minimum number of characters left in
4487    the subject. */
4488
4489    REPEATNOTCHAR:
4490    if (min > md->end_subject - eptr) return FALSE;
4491    c = *ecode++;
4492
4493    /* The code is duplicated for the caseless and caseful cases, for speed,
4494    since matching characters is likely to be quite common. First, ensure the
4495    minimum number of matches are present. If min = max, continue at the same
4496    level without recursing. Otherwise, if minimizing, keep trying the rest of
4497    the expression and advancing one matching character if failing, up to the
4498    maximum. Alternatively, if maximizing, find the maximum number of
4499    characters and work backwards. */
4500
4501    DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
4502      max, eptr));
4503
4504    if ((ims & PCRE_CASELESS) != 0)
4505      {
4506      c = md->lcc[c];
4507      for (i = 1; i <= min; i++)
4508        if (c == md->lcc[*eptr++]) return FALSE;
4509      if (min == max) continue;
4510      if (minimize)
4511        {
4512        for (i = min;; i++)
4513          {
4514          if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
4515            return TRUE;
4516          if (i >= max || eptr >= md->end_subject ||
4517              c == md->lcc[*eptr++])
4518            return FALSE;
4519          }
4520        /* Control never gets here */
4521        }
4522      else
4523        {
4524        const uschar *pp = eptr;
4525        for (i = min; i < max; i++)
4526          {
4527          if (eptr >= md->end_subject || c == md->lcc[*eptr]) break;
4528          eptr++;
4529          }
4530        while (eptr >= pp)
4531          if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
4532            return TRUE;
4533        return FALSE;
4534        }
4535      /* Control never gets here */
4536      }
4537
4538    /* Caseful comparisons */
4539
4540    else
4541      {
4542      for (i = 1; i <= min; i++) if (c == *eptr++) return FALSE;
4543      if (min == max) continue;
4544      if (minimize)
4545        {
4546        for (i = min;; i++)
4547          {
4548          if (match(eptr, ecode, offset_top, md, ims, eptrb, 0))
4549            return TRUE;
4550          if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE;
4551          }
4552        /* Control never gets here */
4553        }
4554      else
4555        {
4556        const uschar *pp = eptr;
4557        for (i = min; i < max; i++)
4558          {
4559          if (eptr >= md->end_subject || c == *eptr) break;
4560          eptr++;
4561          }
4562        while (eptr >= pp)
4563         if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
4564           return TRUE;
4565        return FALSE;
4566        }
4567      }
4568    /* Control never gets here */
4569
4570    /* Match a single character type repeatedly; several different opcodes
4571    share code. This is very similar to the code for single characters, but we
4572    repeat it in the interests of efficiency. */
4573
4574    case OP_TYPEEXACT:
4575    min = max = (ecode[1] << 8) + ecode[2];
4576    minimize = TRUE;
4577    ecode += 3;
4578    goto REPEATTYPE;
4579
4580    case OP_TYPEUPTO:
4581    case OP_TYPEMINUPTO:
4582    min = 0;
4583    max = (ecode[1] << 8) + ecode[2];
4584    minimize = *ecode == OP_TYPEMINUPTO;
4585    ecode += 3;
4586    goto REPEATTYPE;
4587
4588    case OP_TYPESTAR:
4589    case OP_TYPEMINSTAR:
4590    case OP_TYPEPLUS:
4591    case OP_TYPEMINPLUS:
4592    case OP_TYPEQUERY:
4593    case OP_TYPEMINQUERY:
4594    c = *ecode++ - OP_TYPESTAR;
4595    minimize = (c & 1) != 0;
4596    min = rep_min[c];                 /* Pick up values from tables; */
4597    max = rep_max[c];                 /* zero for max => infinity */
4598    if (max == 0) max = INT_MAX;
4599
4600    /* Common code for all repeated single character type matches */
4601
4602    REPEATTYPE:
4603    ctype = *ecode++;      /* Code for the character type */
4604
4605    /* First, ensure the minimum number of matches are present. Use inline
4606    code for maximizing the speed, and do the type test once at the start
4607    (i.e. keep it out of the loop). Also we can test that there are at least
4608    the minimum number of bytes before we start, except when doing '.' in
4609    UTF8 mode. Leave the test in in all cases; in the special case we have
4610    to test after each character. */
4611
4612    if (min > md->end_subject - eptr) return FALSE;
4613    if (min > 0) switch(ctype)
4614      {
4615      case OP_ANY:
4616#ifdef SUPPORT_UTF8
4617      if (md->utf8)
4618        {
4619        for (i = 1; i <= min; i++)
4620          {
4621          if (eptr >= md->end_subject ||
4622             (*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0))
4623            return FALSE;
4624          while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
4625          }
4626        break;
4627        }
4628#endif
4629      /* Non-UTF8 can be faster */
4630      if ((ims & PCRE_DOTALL) == 0)
4631        { for (i = 1; i <= min; i++) if (*eptr++ == NEWLINE) return FALSE; }
4632      else eptr += min;
4633      break;
4634
4635      case OP_NOT_DIGIT:
4636      for (i = 1; i <= min; i++)
4637        if ((md->ctypes[*eptr++] & ctype_digit) != 0) return FALSE;
4638      break;
4639
4640      case OP_DIGIT:
4641      for (i = 1; i <= min; i++)
4642        if ((md->ctypes[*eptr++] & ctype_digit) == 0) return FALSE;
4643      break;
4644
4645      case OP_NOT_WHITESPACE:
4646      for (i = 1; i <= min; i++)
4647        if ((md->ctypes[*eptr++] & ctype_space) != 0) return FALSE;
4648      break;
4649
4650      case OP_WHITESPACE:
4651      for (i = 1; i <= min; i++)
4652        if ((md->ctypes[*eptr++] & ctype_space) == 0) return FALSE;
4653      break;
4654
4655      case OP_NOT_WORDCHAR:
4656      for (i = 1; i <= min; i++)
4657        if ((md->ctypes[*eptr++] & ctype_word) != 0)
4658          return FALSE;
4659      break;
4660
4661      case OP_WORDCHAR:
4662      for (i = 1; i <= min; i++)
4663        if ((md->ctypes[*eptr++] & ctype_word) == 0)
4664          return FALSE;
4665      break;
4666      }
4667
4668    /* If min = max, continue at the same level without recursing */
4669
4670    if (min == max) continue;
4671
4672    /* If minimizing, we have to test the rest of the pattern before each
4673    subsequent match. */
4674
4675    if (minimize)
4676      {
4677      for (i = min;; i++)
4678        {
4679        if (match(eptr, ecode, offset_top, md, ims, eptrb, 0)) return TRUE;
4680        if (i >= max || eptr >= md->end_subject) return FALSE;
4681
4682        c = *eptr++;
4683        switch(ctype)
4684          {
4685          case OP_ANY:
4686          if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) return FALSE;
4687#ifdef SUPPORT_UTF8
4688          if (md->utf8)
4689            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
4690#endif
4691          break;
4692
4693          case OP_NOT_DIGIT:
4694          if ((md->ctypes[c] & ctype_digit) != 0) return FALSE;
4695          break;
4696
4697          case OP_DIGIT:
4698          if ((md->ctypes[c] & ctype_digit) == 0) return FALSE;
4699          break;
4700
4701          case OP_NOT_WHITESPACE:
4702          if ((md->ctypes[c] & ctype_space) != 0) return FALSE;
4703          break;
4704
4705          case OP_WHITESPACE:
4706          if  ((md->ctypes[c] & ctype_space) == 0) return FALSE;
4707          break;
4708
4709          case OP_NOT_WORDCHAR:
4710          if ((md->ctypes[c] & ctype_word) != 0) return FALSE;
4711          break;
4712
4713          case OP_WORDCHAR:
4714          if ((md->ctypes[c] & ctype_word) == 0) return FALSE;
4715          break;
4716          }
4717        }
4718      /* Control never gets here */
4719      }
4720
4721    /* If maximizing it is worth using inline code for speed, doing the type
4722    test once at the start (i.e. keep it out of the loop). */
4723
4724    else
4725      {
4726      const uschar *pp = eptr;
4727      switch(ctype)
4728        {
4729        case OP_ANY:
4730
4731        /* Special code is required for UTF8, but when the maximum is unlimited
4732        we don't need it. */
4733
4734#ifdef SUPPORT_UTF8
4735        if (md->utf8 && max < INT_MAX)
4736          {
4737          if ((ims & PCRE_DOTALL) == 0)
4738            {
4739            for (i = min; i < max; i++)
4740              {
4741              if (eptr >= md->end_subject || *eptr++ == NEWLINE) break;
4742              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
4743              }
4744            }
4745          else
4746            {
4747            for (i = min; i < max; i++)
4748              {
4749              eptr++;
4750              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
4751              }
4752            }
4753          break;
4754          }
4755#endif
4756        /* Non-UTF8 can be faster */
4757        if ((ims & PCRE_DOTALL) == 0)
4758          {
4759          for (i = min; i < max; i++)
4760            {
4761            if (eptr >= md->end_subject || *eptr == NEWLINE) break;
4762            eptr++;
4763            }
4764          }
4765        else
4766          {
4767          c = max - min;
4768          if (c > md->end_subject - eptr) c = md->end_subject - eptr;
4769          eptr += c;
4770          }
4771        break;
4772
4773        case OP_NOT_DIGIT:
4774        for (i = min; i < max; i++)
4775          {
4776          if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
4777            break;
4778          eptr++;
4779          }
4780        break;
4781
4782        case OP_DIGIT:
4783        for (i = min; i < max; i++)
4784          {
4785          if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
4786            break;
4787          eptr++;
4788          }
4789        break;
4790
4791        case OP_NOT_WHITESPACE:
4792        for (i = min; i < max; i++)
4793          {
4794          if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
4795            break;
4796          eptr++;
4797          }
4798        break;
4799
4800        case OP_WHITESPACE:
4801        for (i = min; i < max; i++)
4802          {
4803          if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
4804            break;
4805          eptr++;
4806          }
4807        break;
4808
4809        case OP_NOT_WORDCHAR:
4810        for (i = min; i < max; i++)
4811          {
4812          if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
4813            break;
4814          eptr++;
4815          }
4816        break;
4817
4818        case OP_WORDCHAR:
4819        for (i = min; i < max; i++)
4820          {
4821          if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
4822            break;
4823          eptr++;
4824          }
4825        break;
4826        }
4827
4828      while (eptr >= pp)
4829        {
4830        if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0))
4831          return TRUE;
4832#ifdef SUPPORT_UTF8
4833        if (md->utf8)
4834          while (eptr > pp && (*eptr & 0xc0) == 0x80) eptr--;
4835#endif
4836        }
4837      return FALSE;
4838      }
4839    /* Control never gets here */
4840
4841    /* There's been some horrible disaster. */
4842
4843    default:
4844    DPRINTF(("Unknown opcode %d\n", *ecode));
4845    md->errorcode = PCRE_ERROR_UNKNOWN_NODE;
4846    return FALSE;
4847    }
4848
4849  /* Do not stick any code in here without much thought; it is assumed
4850  that "continue" in the code above comes out to here to repeat the main
4851  loop. */
4852
4853  }             /* End of main loop */
4854/* Control never reaches here */
4855}
4856
4857
4858
4859
4860/*************************************************
4861*         Execute a Regular Expression           *
4862*************************************************/
4863
4864/* This function applies a compiled re to a subject string and picks out
4865portions of the string if it matches. Two elements in the vector are set for
4866each substring: the offsets to the start and end of the substring.
4867
4868Arguments:
4869  external_re     points to the compiled expression
4870  external_extra  points to "hints" from pcre_study() or is NULL
4871  subject         points to the subject string
4872  length          length of subject string (may contain binary zeros)
4873  start_offset    where to start in the subject string
4874  options         option bits
4875  offsets         points to a vector of ints to be filled in with offsets
4876  offsetcount     the number of elements in the vector
4877
4878Returns:          > 0 => success; value is the number of elements filled in
4879                  = 0 => success, but offsets is not big enough
4880                   -1 => failed to match
4881                 < -1 => some kind of unexpected problem
4882*/
4883
4884int
4885pcre_exec(const pcre *external_re, const pcre_extra *external_extra,
4886  const char *subject, int length, int start_offset, int options, int *offsets,
4887  int offsetcount)
4888{
4889int resetcount, ocount;
4890int first_char = -1;
4891int req_char = -1;
4892int req_char2 = -1;
4893unsigned long int ims = 0;
4894match_data match_block;
4895const uschar *start_bits = NULL;
4896const uschar *start_match = (const uschar *)subject + start_offset;
4897const uschar *end_subject;
4898const uschar *req_char_ptr = start_match - 1;
4899const real_pcre *re = (const real_pcre *)external_re;
4900const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
4901BOOL using_temporary_offsets = FALSE;
4902BOOL anchored;
4903BOOL startline;
4904
4905if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
4906
4907if (re == NULL || subject == NULL ||
4908   (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
4909if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
4910
4911anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
4912startline = (re->options & PCRE_STARTLINE) != 0;
4913
4914match_block.start_pattern = re->code;
4915match_block.start_subject = (const uschar *)subject;
4916match_block.end_subject = match_block.start_subject + length;
4917end_subject = match_block.end_subject;
4918
4919match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4920match_block.utf8 = (re->options & PCRE_UTF8) != 0;
4921
4922match_block.notbol = (options & PCRE_NOTBOL) != 0;
4923match_block.noteol = (options & PCRE_NOTEOL) != 0;
4924match_block.notempty = (options & PCRE_NOTEMPTY) != 0;
4925
4926match_block.errorcode = PCRE_ERROR_NOMATCH;     /* Default error */
4927
4928match_block.lcc = re->tables + lcc_offset;
4929match_block.ctypes = re->tables + ctypes_offset;
4930
4931/* The ims options can vary during the matching as a result of the presence
4932of (?ims) items in the pattern. They are kept in a local variable so that
4933restoring at the exit of a group is easy. */
4934
4935ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
4936
4937/* If the expression has got more back references than the offsets supplied can
4938hold, we get a temporary bit of working store to use during the matching.
4939Otherwise, we can use the vector supplied, rounding down its size to a multiple
4940of 3. */
4941
4942ocount = offsetcount - (offsetcount % 3);
4943
4944if (re->top_backref > 0 && re->top_backref >= ocount/3)
4945  {
4946  ocount = re->top_backref * 3 + 3;
4947  match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
4948  if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
4949  using_temporary_offsets = TRUE;
4950  DPRINTF(("Got memory to hold back references\n"));
4951  }
4952else match_block.offset_vector = offsets;
4953
4954match_block.offset_end = ocount;
4955match_block.offset_max = (2*ocount)/3;
4956match_block.offset_overflow = FALSE;
4957
4958/* Compute the minimum number of offsets that we need to reset each time. Doing
4959this makes a huge difference to execution time when there aren't many brackets
4960in the pattern. */
4961
4962resetcount = 2 + re->top_bracket * 2;
4963if (resetcount > offsetcount) resetcount = ocount;
4964
4965/* Reset the working variable associated with each extraction. These should
4966never be used unless previously set, but they get saved and restored, and so we
4967initialize them to avoid reading uninitialized locations. */
4968
4969if (match_block.offset_vector != NULL)
4970  {
4971  register int *iptr = match_block.offset_vector + ocount;
4972  register int *iend = iptr - resetcount/2 + 1;
4973  while (--iptr >= iend) *iptr = -1;
4974  }
4975
4976/* Set up the first character to match, if available. The first_char value is
4977never set for an anchored regular expression, but the anchoring may be forced
4978at run time, so we have to test for anchoring. The first char may be unset for
4979an unanchored pattern, of course. If there's no first char and the pattern was
4980studied, there may be a bitmap of possible first characters. */
4981
4982if (!anchored)
4983  {
4984  if ((re->options & PCRE_FIRSTSET) != 0)
4985    {
4986    first_char = re->first_char;
4987    if ((ims & PCRE_CASELESS) != 0) first_char = match_block.lcc[first_char];
4988    }
4989  else
4990    if (!startline && extra != NULL &&
4991      (extra->options & PCRE_STUDY_MAPPED) != 0)
4992        start_bits = extra->start_bits;
4993  }
4994
4995/* For anchored or unanchored matches, there may be a "last known required
4996character" set. If the PCRE_CASELESS is set, implying that the match starts
4997caselessly, or if there are any changes of this flag within the regex, set up
4998both cases of the character. Otherwise set the two values the same, which will
4999avoid duplicate testing (which takes significant time). This covers the vast
5000majority of cases. It will be suboptimal when the case flag changes in a regex
5001and the required character in fact is caseful. */
5002
5003if ((re->options & PCRE_REQCHSET) != 0)
5004  {
5005  req_char = re->req_char;
5006  req_char2 = ((re->options & (PCRE_CASELESS | PCRE_ICHANGED)) != 0)?
5007    (re->tables + fcc_offset)[req_char] : req_char;
5008  }
5009
5010/* Loop for handling unanchored repeated matching attempts; for anchored regexs
5011the loop runs just once. */
5012
5013do
5014  {
5015  int rc;
5016  register int *iptr = match_block.offset_vector;
5017  register int *iend = iptr + resetcount;
5018
5019  /* Reset the maximum number of extractions we might see. */
5020
5021  while (iptr < iend) *iptr++ = -1;
5022
5023  /* Advance to a unique first char if possible */
5024
5025  if (first_char >= 0)
5026    {
5027    if ((ims & PCRE_CASELESS) != 0)
5028      while (start_match < end_subject &&
5029             match_block.lcc[*start_match] != first_char)
5030        start_match++;
5031    else
5032      while (start_match < end_subject && *start_match != first_char)
5033        start_match++;
5034    }
5035
5036  /* Or to just after \n for a multiline match if possible */
5037
5038  else if (startline)
5039    {
5040    if (start_match > match_block.start_subject + start_offset)
5041      {
5042      while (start_match < end_subject && start_match[-1] != NEWLINE)
5043        start_match++;
5044      }
5045    }
5046
5047  /* Or to a non-unique first char after study */
5048
5049  else if (start_bits != NULL)
5050    {
5051    while (start_match < end_subject)
5052      {
5053      register int c = *start_match;
5054      if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;
5055      }
5056    }
5057
5058#ifdef DEBUG  /* Sigh. Some compilers never learn. */
5059  printf(">>>> Match against: ");
5060  pchars(start_match, end_subject - start_match, TRUE, &match_block);
5061  printf("\n");
5062#endif
5063
5064  /* If req_char is set, we know that that character must appear in the subject
5065  for the match to succeed. If the first character is set, req_char must be
5066  later in the subject; otherwise the test starts at the match point. This
5067  optimization can save a huge amount of backtracking in patterns with nested
5068  unlimited repeats that aren't going to match. We don't know what the state of
5069  case matching may be when this character is hit, so test for it in both its
5070  cases if necessary. However, the different cased versions will not be set up
5071  unless PCRE_CASELESS was given or the casing state changes within the regex.
5072  Writing separate code makes it go faster, as does using an autoincrement and
5073  backing off on a match. */
5074
5075  if (req_char >= 0)
5076    {
5077    register const uschar *p = start_match + ((first_char >= 0)? 1 : 0);
5078
5079    /* We don't need to repeat the search if we haven't yet reached the
5080    place we found it at last time. */
5081
5082    if (p > req_char_ptr)
5083      {
5084      /* Do a single test if no case difference is set up */
5085
5086      if (req_char == req_char2)
5087        {
5088        while (p < end_subject)
5089          {
5090          if (*p++ == req_char) { p--; break; }
5091          }
5092        }
5093
5094      /* Otherwise test for either case */
5095
5096      else
5097        {
5098        while (p < end_subject)
5099          {
5100          register int pp = *p++;
5101          if (pp == req_char || pp == req_char2) { p--; break; }
5102          }
5103        }
5104
5105      /* If we can't find the required character, break the matching loop */
5106
5107      if (p >= end_subject) break;
5108
5109      /* If we have found the required character, save the point where we
5110      found it, so that we don't search again next time round the loop if
5111      the start hasn't passed this character yet. */
5112
5113      req_char_ptr = p;
5114      }
5115    }
5116
5117  /* When a match occurs, substrings will be set for all internal extractions;
5118  we just need to set up the whole thing as substring 0 before returning. If
5119  there were too many extractions, set the return code to zero. In the case
5120  where we had to get some local store to hold offsets for backreferences, copy
5121  those back references that we can. In this case there need not be overflow
5122  if certain parts of the pattern were not used. */
5123
5124  match_block.start_match = start_match;
5125  if (!match(start_match, re->code, 2, &match_block, ims, NULL, match_isgroup))
5126    continue;
5127
5128  /* Copy the offset information from temporary store if necessary */
5129
5130  if (using_temporary_offsets)
5131    {
5132    if (offsetcount >= 4)
5133      {
5134      memcpy(offsets + 2, match_block.offset_vector + 2,
5135        (offsetcount - 2) * sizeof(int));
5136      DPRINTF(("Copied offsets from temporary memory\n"));
5137      }
5138    if (match_block.end_offset_top > offsetcount)
5139      match_block.offset_overflow = TRUE;
5140
5141    DPRINTF(("Freeing temporary memory\n"));
5142    (pcre_free)(match_block.offset_vector);
5143    }
5144
5145  rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
5146
5147  if (offsetcount < 2) rc = 0; else
5148    {
5149    offsets[0] = start_match - match_block.start_subject;
5150    offsets[1] = match_block.end_match_ptr - match_block.start_subject;
5151    }
5152
5153  DPRINTF((">>>> returning %d\n", rc));
5154  return rc;
5155  }
5156
5157/* This "while" is the end of the "do" above */
5158
5159while (!anchored &&
5160       match_block.errorcode == PCRE_ERROR_NOMATCH &&
5161       start_match++ < end_subject);
5162
5163if (using_temporary_offsets)
5164  {
5165  DPRINTF(("Freeing temporary memory\n"));
5166  (pcre_free)(match_block.offset_vector);
5167  }
5168
5169DPRINTF((">>>> returning %d\n", match_block.errorcode));
5170
5171return match_block.errorcode;
5172}
5173
5174/* End of pcre.c */
Note: See TracBrowser for help on using the repository browser.