source: trunk/third/rx/rx/rxnfa.c @ 10430

Revision 10430, 18.5 KB checked in by ghudson, 27 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r10429, which included commits to RCS files with non-trunk default branches.
Line 
1/*      Copyright (C) 1995, 1996 Tom Lord
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU Library General Public License as published by
5 * the Free Software Foundation; either version 2, or (at your option)
6 * any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 * GNU Library General Public License for more details.
12 *
13 * You should have received a copy of the GNU Library General Public License
14 * along with this software; see the file COPYING.  If not, write to
15 * the Free Software Foundation, 59 Temple Place - Suite 330,
16 * Boston, MA 02111-1307, USA.
17 */
18
19
20/*
21 * Tom Lord (lord@cygnus.com, lord@gnu.ai.mit.edu)
22 */
23
24
25#include "rxall.h"
26#include "rxnfa.h"
27
28
29
30#define remalloc(M, S) (M ? realloc (M, S) : malloc (S))
31
32
33/* {Low Level Data Structure Code}
34 */
35
36/* Constructs a new nfa node. */
37#ifdef __STDC__
38struct rx_nfa_state *
39rx_nfa_state (struct rx *rx)
40#else
41struct rx_nfa_state *
42rx_nfa_state (rx)
43     struct rx *rx;
44#endif
45{
46  struct rx_nfa_state * n = (struct rx_nfa_state *)malloc (sizeof (*n));
47  if (!n)
48    return 0;
49  rx_bzero ((char *)n, sizeof (*n));
50  n->next = rx->nfa_states;
51  rx->nfa_states = n;
52  return n;
53}
54
55
56#ifdef __STDC__
57static void
58rx_free_nfa_state (struct rx_nfa_state * n)
59#else
60static void
61rx_free_nfa_state (n)
62  struct rx_nfa_state * n;
63#endif
64{
65  free ((char *)n);
66}
67
68
69/* This adds an edge between two nodes, but doesn't initialize the
70 * edge label.
71 */
72
73#ifdef __STDC__
74struct rx_nfa_edge *
75rx_nfa_edge (struct rx *rx,
76             enum rx_nfa_etype type,
77             struct rx_nfa_state *start,
78             struct rx_nfa_state *dest)
79#else
80struct rx_nfa_edge *
81rx_nfa_edge (rx, type, start, dest)
82     struct rx *rx;
83     enum rx_nfa_etype type;
84     struct rx_nfa_state *start;
85     struct rx_nfa_state *dest;
86#endif
87{
88  struct rx_nfa_edge *e;
89  e = (struct rx_nfa_edge *)malloc (sizeof (*e));
90  if (!e)
91    return 0;
92  e->next = start->edges;
93  start->edges = e;
94  e->type = type;
95  e->dest = dest;
96  return e;
97}
98
99
100#ifdef __STDC__
101static void
102rx_free_nfa_edge (struct rx_nfa_edge * e)
103#else
104static void
105rx_free_nfa_edge (e)
106     struct rx_nfa_edge * e;
107#endif
108{
109  free ((char *)e);
110}
111
112
113/* This constructs a POSSIBLE_FUTURE, which is a kind epsilon-closure
114 * of an NFA.  These are added to an nfa automaticly by eclose_nfa.
115 */ 
116
117#ifdef __STDC__
118static struct rx_possible_future *
119rx_possible_future (struct rx * rx,
120                 struct rx_se_list * effects)
121#else
122static struct rx_possible_future *
123rx_possible_future (rx, effects)
124     struct rx * rx;
125     struct rx_se_list * effects;
126#endif
127{
128  struct rx_possible_future *ec;
129  ec = (struct rx_possible_future *) malloc (sizeof (*ec));
130  if (!ec)
131    return 0;
132  ec->destset = 0;
133  ec->next = 0;
134  ec->effects = effects;
135  return ec;
136}
137
138
139#ifdef __STDC__
140static void
141rx_free_possible_future (struct rx_possible_future * pf)
142#else
143static void
144rx_free_possible_future (pf)
145     struct rx_possible_future * pf;
146#endif
147{
148  free ((char *)pf);
149}
150
151
152#ifdef __STDC__
153static void
154rx_free_nfa_graph (struct rx *rx)
155#else
156static void
157rx_free_nfa_graph (rx)
158     struct rx *rx;
159#endif
160{
161  while (rx->nfa_states)
162    {
163      while (rx->nfa_states->edges)
164        {
165          switch (rx->nfa_states->edges->type)
166            {
167            case ne_cset:
168              rx_free_cset (rx->nfa_states->edges->params.cset);
169              break;
170            default:
171              break;
172            }
173          {
174            struct rx_nfa_edge * e;
175            e = rx->nfa_states->edges;
176            rx->nfa_states->edges = rx->nfa_states->edges->next;
177            rx_free_nfa_edge (e);
178          }
179        } /* while (rx->nfa_states->edges) */
180      {
181        /* Iterate over the partial epsilon closures of rx->nfa_states */
182        struct rx_possible_future * pf = rx->nfa_states->futures;
183        while (pf)
184          {
185            struct rx_possible_future * pft = pf;
186            pf = pf->next;
187            rx_free_possible_future (pft);
188          }
189      }
190      {
191        struct rx_nfa_state *n;
192        n = rx->nfa_states;
193        rx->nfa_states = rx->nfa_states->next;
194        rx_free_nfa_state (n);
195      }
196    }
197}
198
199
200
201/* {Translating a Syntax Tree into an NFA}
202 *
203 */
204
205
206/* This is the Thompson regexp->nfa algorithm.
207 * It is modified to allow for `side-effect epsilons.'  Those are
208 * edges that are taken whenever a similarly placed epsilon edge
209 * would be, but which also imply that some side effect occurs
210 * when the edge is taken.
211 *
212 * Side effects are used to model parts of the pattern langauge
213 * that are not regular.
214 */
215
216#ifdef __STDC__
217int
218rx_build_nfa (struct rx *rx,
219              struct rexp_node *rexp,
220              struct rx_nfa_state **start,
221              struct rx_nfa_state **end)
222#else
223int
224rx_build_nfa (rx, rexp, start, end)
225     struct rx *rx;
226     struct rexp_node *rexp;
227     struct rx_nfa_state **start;
228     struct rx_nfa_state **end;
229#endif
230{
231  struct rx_nfa_edge *edge;
232
233  /* Start & end nodes may have been allocated by the caller. */
234  *start = *start ? *start : rx_nfa_state (rx);
235
236  if (!*start)
237    return 0;
238
239  if (!rexp)
240    {
241      *end = *start;
242      return 1;
243    }
244
245  *end = *end ? *end : rx_nfa_state (rx);
246
247  if (!*end)
248    {
249      rx_free_nfa_state (*start);
250      return 0;
251    }
252
253  switch (rexp->type)
254    {
255    case r_cset:
256      edge = rx_nfa_edge (rx, ne_cset, *start, *end);
257      (*start)->has_cset_edges = 1;
258      if (!edge)
259        return 0;
260      edge->params.cset = rx_copy_cset (rx->local_cset_size,
261                                        rexp->params.cset);
262      if (!edge->params.cset)
263        {
264          rx_free_nfa_edge (edge);
265          return 0;
266        }
267      return 1;
268
269    case r_string:
270      {
271        if (rexp->params.cstr.len == 1)
272          {
273            edge = rx_nfa_edge (rx, ne_cset, *start, *end);
274            (*start)->has_cset_edges = 1;
275            if (!edge)
276              return 0;
277            edge->params.cset = rx_cset (rx->local_cset_size);
278            if (!edge->params.cset)
279              {
280                rx_free_nfa_edge (edge);
281                return 0;
282              }
283            RX_bitset_enjoin (edge->params.cset, rexp->params.cstr.contents[0]);
284            return 1;
285          }
286        else
287          {
288            struct rexp_node copied;
289            struct rx_nfa_state * shared;
290
291            copied = *rexp;
292            shared = 0;
293            copied.params.cstr.len--;
294            copied.params.cstr.contents++;
295            if (!rx_build_nfa (rx, &copied, &shared, end))
296              return 0;
297            copied.params.cstr.len = 1;
298            copied.params.cstr.contents--;
299            return rx_build_nfa (rx, &copied, start, &shared);
300          }
301      }
302 
303    case r_opt:
304      return (rx_build_nfa (rx, rexp->params.pair.left, start, end)
305              && rx_nfa_edge (rx, ne_epsilon, *start, *end));
306
307    case r_plus:
308      {
309        struct rx_nfa_state * star_start = 0;
310        struct rx_nfa_state * star_end = 0;
311        struct rx_nfa_state * shared;
312
313        shared = 0;
314        if (!rx_build_nfa (rx, rexp->params.pair.left, start, &shared))
315          return 0;
316        return (rx_build_nfa (rx, rexp->params.pair.left,
317                              &star_start, &star_end)
318                && star_start
319                && star_end
320                && rx_nfa_edge (rx, ne_epsilon, star_start, star_end)
321                && rx_nfa_edge (rx, ne_epsilon, shared, star_start)
322                && rx_nfa_edge (rx, ne_epsilon, star_end, *end)
323                && rx_nfa_edge (rx, ne_epsilon, star_end, star_start));
324      }
325
326    case r_interval:
327    case r_star:
328      {
329        struct rx_nfa_state * star_start = 0;
330        struct rx_nfa_state * star_end = 0;
331        return (rx_build_nfa (rx, rexp->params.pair.left,
332                              &star_start, &star_end)
333                && star_start
334                && star_end
335                && rx_nfa_edge (rx, ne_epsilon, star_start, star_end)
336                && rx_nfa_edge (rx, ne_epsilon, *start, star_start)
337                && rx_nfa_edge (rx, ne_epsilon, star_end, *end)
338
339                && rx_nfa_edge (rx, ne_epsilon, star_end, star_start));
340      }
341
342    case r_cut:
343      {
344        struct rx_nfa_state * cut_end = 0;
345
346        cut_end = rx_nfa_state (rx);
347        if (!(cut_end && rx_nfa_edge (rx, ne_epsilon, *start, cut_end)))
348          {
349            rx_free_nfa_state (*start);
350            rx_free_nfa_state (*end);
351            if (cut_end)
352              rx_free_nfa_state (cut_end);
353            return 0;
354          }
355        cut_end->is_final = rexp->params.intval;
356        return 1;
357      }
358
359    case r_parens:
360      return rx_build_nfa (rx, rexp->params.pair.left, start, end);
361
362    case r_concat:
363      {
364        struct rx_nfa_state *shared = 0;
365        return
366          (rx_build_nfa (rx, rexp->params.pair.left, start, &shared)
367           && rx_build_nfa (rx, rexp->params.pair.right, &shared, end));
368      }
369
370    case r_alternate:
371      {
372        struct rx_nfa_state *ls = 0;
373        struct rx_nfa_state *le = 0;
374        struct rx_nfa_state *rs = 0;
375        struct rx_nfa_state *re = 0;
376        return (rx_build_nfa (rx, rexp->params.pair.left, &ls, &le)
377                && rx_build_nfa (rx, rexp->params.pair.right, &rs, &re)
378                && rx_nfa_edge (rx, ne_epsilon, *start, ls)
379                && rx_nfa_edge (rx, ne_epsilon, *start, rs)
380                && rx_nfa_edge (rx, ne_epsilon, le, *end)
381                && rx_nfa_edge (rx, ne_epsilon, re, *end));
382      }
383
384    case r_context:
385      edge = rx_nfa_edge (rx, ne_side_effect, *start, *end);
386      if (!edge)
387        return 0;
388      edge->params.side_effect = (void *)rexp->params.intval;
389      return 1;
390    }
391
392  /* this should never happen */
393  return 0;
394}
395
396
397/* {Low Level Data Structures for the Static NFA->SuperNFA Analysis}
398 *
399 * There are side effect lists -- lists of side effects occuring
400 * along an uninterrupted, acyclic path of side-effect epsilon edges.
401 * Such paths are collapsed to single edges in the course of computing
402 * epsilon closures.  The resulting single edges are labled with a list
403 * of all the side effects from the original multi-edge path.  Equivalent
404 * lists of side effects are made == by the constructors below.
405 *
406 * There are also nfa state sets.  These are used to hold a list of all
407 * states reachable from a starting state for a given type of transition
408 * and side effect list.   These are also hash-consed.
409 */
410
411
412
413/* The next several functions compare, construct, etc. lists of side
414 * effects.  See ECLOSE_NFA (below) for details.
415 */
416
417/* Ordering of rx_se_list
418 * (-1, 0, 1 return value convention).
419 */
420
421#ifdef __STDC__
422static int
423se_list_cmp (void * va, void * vb)
424#else
425static int
426se_list_cmp (va, vb)
427     void * va;
428     void * vb;
429#endif
430{
431  struct rx_se_list * a = (struct rx_se_list *)va;
432  struct rx_se_list * b = (struct rx_se_list *)vb;
433
434  return ((va == vb)
435          ? 0
436          : (!va
437             ? -1
438             : (!vb
439                ? 1
440                : ((long)a->car < (long)b->car
441                   ? 1
442                   : ((long)a->car > (long)b->car
443                      ? -1
444                      : se_list_cmp ((void *)a->cdr, (void *)b->cdr))))));
445}
446
447
448#ifdef __STDC__
449static int
450se_list_equal (void * va, void * vb)
451#else
452static int
453se_list_equal (va, vb)
454     void * va;
455     void * vb;
456#endif
457{
458  return !(se_list_cmp (va, vb));
459}
460
461static struct rx_hash_rules se_list_hash_rules = { se_list_equal, 0, 0, 0, 0 };
462
463
464#ifdef __STDC__
465static struct rx_se_list *
466side_effect_cons (struct rx * rx,
467                  void * se, struct rx_se_list * list)
468#else
469static struct rx_se_list *
470side_effect_cons (rx, se, list)
471     struct rx * rx;
472     void * se;
473     struct rx_se_list * list;
474#endif
475{
476  struct rx_se_list * l;
477  l = ((struct rx_se_list *) malloc (sizeof (*l)));
478  if (!l)
479    return 0;
480  l->car = se;
481  l->cdr = list;
482  return l;
483}
484
485
486#ifdef __STDC__
487static struct rx_se_list *
488hash_cons_se_prog (struct rx * rx,
489                   struct rx_hash * memo,
490                   void * car, struct rx_se_list * cdr)
491#else
492static struct rx_se_list *
493hash_cons_se_prog (rx, memo, car, cdr)
494     struct rx * rx;
495     struct rx_hash * memo;
496     void * car;
497     struct rx_se_list * cdr;
498#endif
499{
500  long hash = (long)car ^ (long)cdr;
501  struct rx_se_list template;
502
503  template.car = car;
504  template.cdr = cdr;
505  {
506    struct rx_hash_item * it = rx_hash_store (memo, hash,
507                                              (void *)&template,
508                                              &se_list_hash_rules);
509    if (!it)
510      return 0;
511    if (it->data == (void *)&template)
512      {
513        struct rx_se_list * consed;
514        consed = (struct rx_se_list *) malloc (sizeof (*consed));
515        *consed = template;
516        it->data = (void *)consed;
517      }
518    return (struct rx_se_list *)it->data;
519  }
520}
521     
522
523#ifdef __STDC__
524static struct rx_se_list *
525hash_se_prog (struct rx * rx, struct rx_hash * memo, struct rx_se_list * prog)
526#else
527static struct rx_se_list *
528hash_se_prog (rx, memo, prog)
529     struct rx * rx;
530     struct rx_hash * memo;
531     struct rx_se_list * prog;
532#endif
533{
534  struct rx_se_list * answer = 0;
535  while (prog)
536    {
537      answer = hash_cons_se_prog (rx, memo, prog->car, answer);
538      if (!answer)
539        return 0;
540      prog = prog->cdr;
541    }
542  return answer;
543}
544
545
546
547/* {Constructors, etc. for NFA State Sets}
548 */
549
550#ifdef __STDC__
551static int
552nfa_set_cmp (void * va, void * vb)
553#else
554static int
555nfa_set_cmp (va, vb)
556     void * va;
557     void * vb;
558#endif
559{
560  struct rx_nfa_state_set * a = (struct rx_nfa_state_set *)va;
561  struct rx_nfa_state_set * b = (struct rx_nfa_state_set *)vb;
562
563  return ((va == vb)
564          ? 0
565          : (!va
566             ? -1
567             : (!vb
568                ? 1
569                : (a->car->id < b->car->id
570                   ? 1
571                   : (a->car->id > b->car->id
572                      ? -1
573                      : nfa_set_cmp ((void *)a->cdr, (void *)b->cdr))))));
574}
575
576#ifdef __STDC__
577static int
578nfa_set_equal (void * va, void * vb)
579#else
580static int
581nfa_set_equal (va, vb)
582     void * va;
583     void * vb;
584#endif
585{
586  return !nfa_set_cmp (va, vb);
587}
588
589static struct rx_hash_rules nfa_set_hash_rules = { nfa_set_equal, 0, 0, 0, 0 };
590
591
592#ifdef __STDC__
593static struct rx_nfa_state_set *
594nfa_set_cons (struct rx * rx,
595              struct rx_hash * memo, struct rx_nfa_state * state,
596              struct rx_nfa_state_set * set)
597#else
598static struct rx_nfa_state_set *
599nfa_set_cons (rx, memo, state, set)
600     struct rx * rx;
601     struct rx_hash * memo;
602     struct rx_nfa_state * state;
603     struct rx_nfa_state_set * set;
604#endif
605{
606  struct rx_nfa_state_set template;
607  struct rx_hash_item * node;
608  template.car = state;
609  template.cdr = set;
610  node = rx_hash_store (memo,
611                        (((long)state) >> 8) ^ (long)set,
612                        &template, &nfa_set_hash_rules);
613  if (!node)
614    return 0;
615  if (node->data == &template)
616    {
617      struct rx_nfa_state_set * l;
618      l = (struct rx_nfa_state_set *) malloc (sizeof (*l));
619      node->data = (void *) l;
620      if (!l)
621        return 0;
622      *l = template;
623    }
624  return (struct rx_nfa_state_set *)node->data;
625}
626
627
628#ifdef __STDC__
629static struct rx_nfa_state_set *
630nfa_set_enjoin (struct rx * rx,
631                struct rx_hash * memo, struct rx_nfa_state * state,
632                struct rx_nfa_state_set * set)
633#else
634static struct rx_nfa_state_set *
635nfa_set_enjoin (rx, memo, state, set)
636     struct rx * rx;
637     struct rx_hash * memo;
638     struct rx_nfa_state * state;
639     struct rx_nfa_state_set * set;
640#endif
641{
642  if (!set || (state->id < set->car->id))
643    return nfa_set_cons (rx, memo, state, set);
644  if (state->id == set->car->id)
645    return set;
646  else
647    {
648      struct rx_nfa_state_set * newcdr
649        = nfa_set_enjoin (rx, memo, state, set->cdr);
650      if (newcdr != set->cdr)
651        set = nfa_set_cons (rx, memo, set->car, newcdr);
652      return set;
653    }
654}
655
656
657/* {Computing Epsilon/Side-effect Closures.}
658 */
659
660struct eclose_frame
661{
662  struct rx_se_list *prog_backwards;
663};
664
665
666/* This is called while computing closures for "outnode".
667 * The current node in the traversal is "node".
668 * "frame" contains a list of a all side effects between
669 * "outnode" and "node" from most to least recent.
670 *
671 * Explores forward from "node", adding new possible
672 * futures to outnode.
673 *
674 * Returns 0 on allocation failure.
675 */
676
677#ifdef __STDC__
678static int
679eclose_node (struct rx *rx, struct rx_nfa_state *outnode,
680             struct rx_nfa_state *node, struct eclose_frame *frame)
681#else
682static int
683eclose_node (rx, outnode, node, frame)
684     struct rx *rx;
685     struct rx_nfa_state *outnode;
686     struct rx_nfa_state *node;
687     struct eclose_frame *frame;
688#endif
689{
690  struct rx_nfa_edge *e = node->edges;
691
692  /* For each node, we follow all epsilon paths to build the closure.
693   * The closure omits nodes that have only epsilon edges.
694   * The closure is split into partial closures -- all the states in
695   * a partial closure are reached by crossing the same list of
696   * of side effects (though not necessarily the same path).
697   */
698  if (node->mark)
699    return 1;
700  node->mark = 1;
701
702  /* If "node" has more than just epsilon and
703   * and side-effect transitions (id >= 0), or is final,
704   * then it has to be added to the possible futures
705   * of "outnode".
706   */
707  if (node->id >= 0 || node->is_final)
708    {
709      struct rx_possible_future **ec;
710      struct rx_se_list * prog_in_order;
711      int cmp;
712
713      prog_in_order = ((struct rx_se_list *)hash_se_prog (rx,
714                                                          &rx->se_list_memo,
715                                                          frame->prog_backwards));
716
717      ec = &outnode->futures;
718
719      while (*ec)
720        {
721          cmp = se_list_cmp ((void *)(*ec)->effects, (void *)prog_in_order);
722          if (cmp <= 0)
723            break;
724          ec = &(*ec)->next;
725        }
726
727      if (!*ec || (cmp < 0))
728        {
729          struct rx_possible_future * pf;
730          pf = rx_possible_future (rx, prog_in_order);
731          if (!pf)
732            return 0;
733          pf->next = *ec;
734          *ec = pf;
735        }
736      if (node->id >= 0)
737        {
738          (*ec)->destset = nfa_set_enjoin (rx, &rx->set_list_memo,
739                                           node, (*ec)->destset);
740          if (!(*ec)->destset)
741            return 0;
742        }
743    }
744
745  /* Recurse on outgoing epsilon and side effect nodes.
746   */
747  while (e)
748    {
749      switch (e->type)
750        {
751        case ne_epsilon:
752          if (!eclose_node (rx, outnode, e->dest, frame))
753            return 0;
754          break;
755        case ne_side_effect:
756          {
757            frame->prog_backwards = side_effect_cons (rx,
758                                                      e->params.side_effect,
759                                                      frame->prog_backwards);
760            if (!frame->prog_backwards)
761              return 0;
762            if (!eclose_node (rx, outnode, e->dest, frame))
763              return 0;
764            {
765              struct rx_se_list * dying = frame->prog_backwards;
766              frame->prog_backwards = frame->prog_backwards->cdr;
767              free ((char *)dying);
768            }
769            break;
770          }
771        default:
772          break;
773        }
774      e = e->next;
775    }
776  node->mark = 0;
777  return 1;
778}
779
780
781#ifdef __STDC__
782struct rx_possible_future *
783rx_state_possible_futures (struct rx * rx, struct rx_nfa_state * n)
784#else
785struct rx_possible_future *
786rx_state_possible_futures (rx, n)
787     struct rx * rx;
788     struct rx_nfa_state * n;
789#endif
790{
791  if (n->futures_computed)
792    return n->futures;
793  else
794    {
795      struct eclose_frame frame;
796      frame.prog_backwards = 0;
797      if (!eclose_node (rx, n, n, &frame))
798        return 0;
799      else
800        {
801          n->futures_computed = 1;
802          return n->futures;
803        }
804    }
805}
806
807
808
809/* {Storing the NFA in a Contiguous Region of Memory}
810 */
811
812
813
814#ifdef __STDC__
815static void
816se_memo_freer (struct rx_hash_item * node)
817#else
818static void
819se_memo_freer (node)
820     struct rx_hash_item * node;
821#endif
822{
823  free ((char *)node->data);
824}
825
826
827#ifdef __STDC__
828static void
829nfa_set_freer (struct rx_hash_item * node)
830#else
831static void
832nfa_set_freer (node)
833     struct rx_hash_item * node;
834#endif
835{
836  free ((char *)node->data);
837}
838
839#ifdef __STDC__
840void
841rx_free_nfa (struct rx *rx)
842#else
843void
844rx_free_nfa (rx)
845     struct rx *rx;
846#endif
847{
848  rx_free_hash_table (&rx->se_list_memo, se_memo_freer, &se_list_hash_rules);
849  rx_bzero ((char *)&rx->se_list_memo, sizeof (rx->se_list_memo));
850  rx_free_hash_table (&rx->set_list_memo, nfa_set_freer, &nfa_set_hash_rules);
851  rx_bzero ((char *)&rx->set_list_memo, sizeof (rx->set_list_memo));
852  rx_free_nfa_graph (rx);
853}
Note: See TracBrowser for help on using the repository browser.