1 | /* Copyright (C) 1995, 1996 Tom Lord |
---|
2 | * |
---|
3 | * This program is free software; you can redistribute it and/or modify |
---|
4 | * it under the terms of the GNU Library General Public License as published by |
---|
5 | * the Free Software Foundation; either version 2, or (at your option) |
---|
6 | * any later version. |
---|
7 | * |
---|
8 | * This program is distributed in the hope that it will be useful, |
---|
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
11 | * GNU Library General Public License for more details. |
---|
12 | * |
---|
13 | * You should have received a copy of the GNU Library General Public License |
---|
14 | * along with this software; see the file COPYING. If not, write to |
---|
15 | * the Free Software Foundation, 59 Temple Place - Suite 330, |
---|
16 | * Boston, MA 02111-1307, USA. |
---|
17 | */ |
---|
18 | |
---|
19 | |
---|
20 | /* |
---|
21 | * Tom Lord (lord@cygnus.com, lord@gnu.ai.mit.edu) |
---|
22 | */ |
---|
23 | |
---|
24 | |
---|
25 | #include "rxall.h" |
---|
26 | #include "rxnfa.h" |
---|
27 | |
---|
28 | |
---|
29 | |
---|
30 | #define remalloc(M, S) (M ? realloc (M, S) : malloc (S)) |
---|
31 | |
---|
32 | |
---|
33 | /* {Low Level Data Structure Code} |
---|
34 | */ |
---|
35 | |
---|
36 | /* Constructs a new nfa node. */ |
---|
37 | #ifdef __STDC__ |
---|
38 | struct rx_nfa_state * |
---|
39 | rx_nfa_state (struct rx *rx) |
---|
40 | #else |
---|
41 | struct rx_nfa_state * |
---|
42 | rx_nfa_state (rx) |
---|
43 | struct rx *rx; |
---|
44 | #endif |
---|
45 | { |
---|
46 | struct rx_nfa_state * n = (struct rx_nfa_state *)malloc (sizeof (*n)); |
---|
47 | if (!n) |
---|
48 | return 0; |
---|
49 | rx_bzero ((char *)n, sizeof (*n)); |
---|
50 | n->next = rx->nfa_states; |
---|
51 | rx->nfa_states = n; |
---|
52 | return n; |
---|
53 | } |
---|
54 | |
---|
55 | |
---|
56 | #ifdef __STDC__ |
---|
57 | static void |
---|
58 | rx_free_nfa_state (struct rx_nfa_state * n) |
---|
59 | #else |
---|
60 | static void |
---|
61 | rx_free_nfa_state (n) |
---|
62 | struct rx_nfa_state * n; |
---|
63 | #endif |
---|
64 | { |
---|
65 | free ((char *)n); |
---|
66 | } |
---|
67 | |
---|
68 | |
---|
69 | /* This adds an edge between two nodes, but doesn't initialize the |
---|
70 | * edge label. |
---|
71 | */ |
---|
72 | |
---|
73 | #ifdef __STDC__ |
---|
74 | struct rx_nfa_edge * |
---|
75 | rx_nfa_edge (struct rx *rx, |
---|
76 | enum rx_nfa_etype type, |
---|
77 | struct rx_nfa_state *start, |
---|
78 | struct rx_nfa_state *dest) |
---|
79 | #else |
---|
80 | struct rx_nfa_edge * |
---|
81 | rx_nfa_edge (rx, type, start, dest) |
---|
82 | struct rx *rx; |
---|
83 | enum rx_nfa_etype type; |
---|
84 | struct rx_nfa_state *start; |
---|
85 | struct rx_nfa_state *dest; |
---|
86 | #endif |
---|
87 | { |
---|
88 | struct rx_nfa_edge *e; |
---|
89 | e = (struct rx_nfa_edge *)malloc (sizeof (*e)); |
---|
90 | if (!e) |
---|
91 | return 0; |
---|
92 | e->next = start->edges; |
---|
93 | start->edges = e; |
---|
94 | e->type = type; |
---|
95 | e->dest = dest; |
---|
96 | return e; |
---|
97 | } |
---|
98 | |
---|
99 | |
---|
100 | #ifdef __STDC__ |
---|
101 | static void |
---|
102 | rx_free_nfa_edge (struct rx_nfa_edge * e) |
---|
103 | #else |
---|
104 | static void |
---|
105 | rx_free_nfa_edge (e) |
---|
106 | struct rx_nfa_edge * e; |
---|
107 | #endif |
---|
108 | { |
---|
109 | free ((char *)e); |
---|
110 | } |
---|
111 | |
---|
112 | |
---|
113 | /* This constructs a POSSIBLE_FUTURE, which is a kind epsilon-closure |
---|
114 | * of an NFA. These are added to an nfa automaticly by eclose_nfa. |
---|
115 | */ |
---|
116 | |
---|
117 | #ifdef __STDC__ |
---|
118 | static struct rx_possible_future * |
---|
119 | rx_possible_future (struct rx * rx, |
---|
120 | struct rx_se_list * effects) |
---|
121 | #else |
---|
122 | static struct rx_possible_future * |
---|
123 | rx_possible_future (rx, effects) |
---|
124 | struct rx * rx; |
---|
125 | struct rx_se_list * effects; |
---|
126 | #endif |
---|
127 | { |
---|
128 | struct rx_possible_future *ec; |
---|
129 | ec = (struct rx_possible_future *) malloc (sizeof (*ec)); |
---|
130 | if (!ec) |
---|
131 | return 0; |
---|
132 | ec->destset = 0; |
---|
133 | ec->next = 0; |
---|
134 | ec->effects = effects; |
---|
135 | return ec; |
---|
136 | } |
---|
137 | |
---|
138 | |
---|
139 | #ifdef __STDC__ |
---|
140 | static void |
---|
141 | rx_free_possible_future (struct rx_possible_future * pf) |
---|
142 | #else |
---|
143 | static void |
---|
144 | rx_free_possible_future (pf) |
---|
145 | struct rx_possible_future * pf; |
---|
146 | #endif |
---|
147 | { |
---|
148 | free ((char *)pf); |
---|
149 | } |
---|
150 | |
---|
151 | |
---|
152 | #ifdef __STDC__ |
---|
153 | static void |
---|
154 | rx_free_nfa_graph (struct rx *rx) |
---|
155 | #else |
---|
156 | static void |
---|
157 | rx_free_nfa_graph (rx) |
---|
158 | struct rx *rx; |
---|
159 | #endif |
---|
160 | { |
---|
161 | while (rx->nfa_states) |
---|
162 | { |
---|
163 | while (rx->nfa_states->edges) |
---|
164 | { |
---|
165 | switch (rx->nfa_states->edges->type) |
---|
166 | { |
---|
167 | case ne_cset: |
---|
168 | rx_free_cset (rx->nfa_states->edges->params.cset); |
---|
169 | break; |
---|
170 | default: |
---|
171 | break; |
---|
172 | } |
---|
173 | { |
---|
174 | struct rx_nfa_edge * e; |
---|
175 | e = rx->nfa_states->edges; |
---|
176 | rx->nfa_states->edges = rx->nfa_states->edges->next; |
---|
177 | rx_free_nfa_edge (e); |
---|
178 | } |
---|
179 | } /* while (rx->nfa_states->edges) */ |
---|
180 | { |
---|
181 | /* Iterate over the partial epsilon closures of rx->nfa_states */ |
---|
182 | struct rx_possible_future * pf = rx->nfa_states->futures; |
---|
183 | while (pf) |
---|
184 | { |
---|
185 | struct rx_possible_future * pft = pf; |
---|
186 | pf = pf->next; |
---|
187 | rx_free_possible_future (pft); |
---|
188 | } |
---|
189 | } |
---|
190 | { |
---|
191 | struct rx_nfa_state *n; |
---|
192 | n = rx->nfa_states; |
---|
193 | rx->nfa_states = rx->nfa_states->next; |
---|
194 | rx_free_nfa_state (n); |
---|
195 | } |
---|
196 | } |
---|
197 | } |
---|
198 | |
---|
199 | |
---|
200 | |
---|
201 | /* {Translating a Syntax Tree into an NFA} |
---|
202 | * |
---|
203 | */ |
---|
204 | |
---|
205 | |
---|
206 | /* This is the Thompson regexp->nfa algorithm. |
---|
207 | * It is modified to allow for `side-effect epsilons.' Those are |
---|
208 | * edges that are taken whenever a similarly placed epsilon edge |
---|
209 | * would be, but which also imply that some side effect occurs |
---|
210 | * when the edge is taken. |
---|
211 | * |
---|
212 | * Side effects are used to model parts of the pattern langauge |
---|
213 | * that are not regular. |
---|
214 | */ |
---|
215 | |
---|
216 | #ifdef __STDC__ |
---|
217 | int |
---|
218 | rx_build_nfa (struct rx *rx, |
---|
219 | struct rexp_node *rexp, |
---|
220 | struct rx_nfa_state **start, |
---|
221 | struct rx_nfa_state **end) |
---|
222 | #else |
---|
223 | int |
---|
224 | rx_build_nfa (rx, rexp, start, end) |
---|
225 | struct rx *rx; |
---|
226 | struct rexp_node *rexp; |
---|
227 | struct rx_nfa_state **start; |
---|
228 | struct rx_nfa_state **end; |
---|
229 | #endif |
---|
230 | { |
---|
231 | struct rx_nfa_edge *edge; |
---|
232 | |
---|
233 | /* Start & end nodes may have been allocated by the caller. */ |
---|
234 | *start = *start ? *start : rx_nfa_state (rx); |
---|
235 | |
---|
236 | if (!*start) |
---|
237 | return 0; |
---|
238 | |
---|
239 | if (!rexp) |
---|
240 | { |
---|
241 | *end = *start; |
---|
242 | return 1; |
---|
243 | } |
---|
244 | |
---|
245 | *end = *end ? *end : rx_nfa_state (rx); |
---|
246 | |
---|
247 | if (!*end) |
---|
248 | { |
---|
249 | rx_free_nfa_state (*start); |
---|
250 | return 0; |
---|
251 | } |
---|
252 | |
---|
253 | switch (rexp->type) |
---|
254 | { |
---|
255 | case r_cset: |
---|
256 | edge = rx_nfa_edge (rx, ne_cset, *start, *end); |
---|
257 | (*start)->has_cset_edges = 1; |
---|
258 | if (!edge) |
---|
259 | return 0; |
---|
260 | edge->params.cset = rx_copy_cset (rx->local_cset_size, |
---|
261 | rexp->params.cset); |
---|
262 | if (!edge->params.cset) |
---|
263 | { |
---|
264 | rx_free_nfa_edge (edge); |
---|
265 | return 0; |
---|
266 | } |
---|
267 | return 1; |
---|
268 | |
---|
269 | case r_string: |
---|
270 | { |
---|
271 | if (rexp->params.cstr.len == 1) |
---|
272 | { |
---|
273 | edge = rx_nfa_edge (rx, ne_cset, *start, *end); |
---|
274 | (*start)->has_cset_edges = 1; |
---|
275 | if (!edge) |
---|
276 | return 0; |
---|
277 | edge->params.cset = rx_cset (rx->local_cset_size); |
---|
278 | if (!edge->params.cset) |
---|
279 | { |
---|
280 | rx_free_nfa_edge (edge); |
---|
281 | return 0; |
---|
282 | } |
---|
283 | RX_bitset_enjoin (edge->params.cset, rexp->params.cstr.contents[0]); |
---|
284 | return 1; |
---|
285 | } |
---|
286 | else |
---|
287 | { |
---|
288 | struct rexp_node copied; |
---|
289 | struct rx_nfa_state * shared; |
---|
290 | |
---|
291 | copied = *rexp; |
---|
292 | shared = 0; |
---|
293 | copied.params.cstr.len--; |
---|
294 | copied.params.cstr.contents++; |
---|
295 | if (!rx_build_nfa (rx, &copied, &shared, end)) |
---|
296 | return 0; |
---|
297 | copied.params.cstr.len = 1; |
---|
298 | copied.params.cstr.contents--; |
---|
299 | return rx_build_nfa (rx, &copied, start, &shared); |
---|
300 | } |
---|
301 | } |
---|
302 | |
---|
303 | case r_opt: |
---|
304 | return (rx_build_nfa (rx, rexp->params.pair.left, start, end) |
---|
305 | && rx_nfa_edge (rx, ne_epsilon, *start, *end)); |
---|
306 | |
---|
307 | case r_plus: |
---|
308 | { |
---|
309 | struct rx_nfa_state * star_start = 0; |
---|
310 | struct rx_nfa_state * star_end = 0; |
---|
311 | struct rx_nfa_state * shared; |
---|
312 | |
---|
313 | shared = 0; |
---|
314 | if (!rx_build_nfa (rx, rexp->params.pair.left, start, &shared)) |
---|
315 | return 0; |
---|
316 | return (rx_build_nfa (rx, rexp->params.pair.left, |
---|
317 | &star_start, &star_end) |
---|
318 | && star_start |
---|
319 | && star_end |
---|
320 | && rx_nfa_edge (rx, ne_epsilon, star_start, star_end) |
---|
321 | && rx_nfa_edge (rx, ne_epsilon, shared, star_start) |
---|
322 | && rx_nfa_edge (rx, ne_epsilon, star_end, *end) |
---|
323 | && rx_nfa_edge (rx, ne_epsilon, star_end, star_start)); |
---|
324 | } |
---|
325 | |
---|
326 | case r_interval: |
---|
327 | case r_star: |
---|
328 | { |
---|
329 | struct rx_nfa_state * star_start = 0; |
---|
330 | struct rx_nfa_state * star_end = 0; |
---|
331 | return (rx_build_nfa (rx, rexp->params.pair.left, |
---|
332 | &star_start, &star_end) |
---|
333 | && star_start |
---|
334 | && star_end |
---|
335 | && rx_nfa_edge (rx, ne_epsilon, star_start, star_end) |
---|
336 | && rx_nfa_edge (rx, ne_epsilon, *start, star_start) |
---|
337 | && rx_nfa_edge (rx, ne_epsilon, star_end, *end) |
---|
338 | |
---|
339 | && rx_nfa_edge (rx, ne_epsilon, star_end, star_start)); |
---|
340 | } |
---|
341 | |
---|
342 | case r_cut: |
---|
343 | { |
---|
344 | struct rx_nfa_state * cut_end = 0; |
---|
345 | |
---|
346 | cut_end = rx_nfa_state (rx); |
---|
347 | if (!(cut_end && rx_nfa_edge (rx, ne_epsilon, *start, cut_end))) |
---|
348 | { |
---|
349 | rx_free_nfa_state (*start); |
---|
350 | rx_free_nfa_state (*end); |
---|
351 | if (cut_end) |
---|
352 | rx_free_nfa_state (cut_end); |
---|
353 | return 0; |
---|
354 | } |
---|
355 | cut_end->is_final = rexp->params.intval; |
---|
356 | return 1; |
---|
357 | } |
---|
358 | |
---|
359 | case r_parens: |
---|
360 | return rx_build_nfa (rx, rexp->params.pair.left, start, end); |
---|
361 | |
---|
362 | case r_concat: |
---|
363 | { |
---|
364 | struct rx_nfa_state *shared = 0; |
---|
365 | return |
---|
366 | (rx_build_nfa (rx, rexp->params.pair.left, start, &shared) |
---|
367 | && rx_build_nfa (rx, rexp->params.pair.right, &shared, end)); |
---|
368 | } |
---|
369 | |
---|
370 | case r_alternate: |
---|
371 | { |
---|
372 | struct rx_nfa_state *ls = 0; |
---|
373 | struct rx_nfa_state *le = 0; |
---|
374 | struct rx_nfa_state *rs = 0; |
---|
375 | struct rx_nfa_state *re = 0; |
---|
376 | return (rx_build_nfa (rx, rexp->params.pair.left, &ls, &le) |
---|
377 | && rx_build_nfa (rx, rexp->params.pair.right, &rs, &re) |
---|
378 | && rx_nfa_edge (rx, ne_epsilon, *start, ls) |
---|
379 | && rx_nfa_edge (rx, ne_epsilon, *start, rs) |
---|
380 | && rx_nfa_edge (rx, ne_epsilon, le, *end) |
---|
381 | && rx_nfa_edge (rx, ne_epsilon, re, *end)); |
---|
382 | } |
---|
383 | |
---|
384 | case r_context: |
---|
385 | edge = rx_nfa_edge (rx, ne_side_effect, *start, *end); |
---|
386 | if (!edge) |
---|
387 | return 0; |
---|
388 | edge->params.side_effect = (void *)rexp->params.intval; |
---|
389 | return 1; |
---|
390 | } |
---|
391 | |
---|
392 | /* this should never happen */ |
---|
393 | return 0; |
---|
394 | } |
---|
395 | |
---|
396 | |
---|
397 | /* {Low Level Data Structures for the Static NFA->SuperNFA Analysis} |
---|
398 | * |
---|
399 | * There are side effect lists -- lists of side effects occuring |
---|
400 | * along an uninterrupted, acyclic path of side-effect epsilon edges. |
---|
401 | * Such paths are collapsed to single edges in the course of computing |
---|
402 | * epsilon closures. The resulting single edges are labled with a list |
---|
403 | * of all the side effects from the original multi-edge path. Equivalent |
---|
404 | * lists of side effects are made == by the constructors below. |
---|
405 | * |
---|
406 | * There are also nfa state sets. These are used to hold a list of all |
---|
407 | * states reachable from a starting state for a given type of transition |
---|
408 | * and side effect list. These are also hash-consed. |
---|
409 | */ |
---|
410 | |
---|
411 | |
---|
412 | |
---|
413 | /* The next several functions compare, construct, etc. lists of side |
---|
414 | * effects. See ECLOSE_NFA (below) for details. |
---|
415 | */ |
---|
416 | |
---|
417 | /* Ordering of rx_se_list |
---|
418 | * (-1, 0, 1 return value convention). |
---|
419 | */ |
---|
420 | |
---|
421 | #ifdef __STDC__ |
---|
422 | static int |
---|
423 | se_list_cmp (void * va, void * vb) |
---|
424 | #else |
---|
425 | static int |
---|
426 | se_list_cmp (va, vb) |
---|
427 | void * va; |
---|
428 | void * vb; |
---|
429 | #endif |
---|
430 | { |
---|
431 | struct rx_se_list * a = (struct rx_se_list *)va; |
---|
432 | struct rx_se_list * b = (struct rx_se_list *)vb; |
---|
433 | |
---|
434 | return ((va == vb) |
---|
435 | ? 0 |
---|
436 | : (!va |
---|
437 | ? -1 |
---|
438 | : (!vb |
---|
439 | ? 1 |
---|
440 | : ((long)a->car < (long)b->car |
---|
441 | ? 1 |
---|
442 | : ((long)a->car > (long)b->car |
---|
443 | ? -1 |
---|
444 | : se_list_cmp ((void *)a->cdr, (void *)b->cdr)))))); |
---|
445 | } |
---|
446 | |
---|
447 | |
---|
448 | #ifdef __STDC__ |
---|
449 | static int |
---|
450 | se_list_equal (void * va, void * vb) |
---|
451 | #else |
---|
452 | static int |
---|
453 | se_list_equal (va, vb) |
---|
454 | void * va; |
---|
455 | void * vb; |
---|
456 | #endif |
---|
457 | { |
---|
458 | return !(se_list_cmp (va, vb)); |
---|
459 | } |
---|
460 | |
---|
461 | static struct rx_hash_rules se_list_hash_rules = { se_list_equal, 0, 0, 0, 0 }; |
---|
462 | |
---|
463 | |
---|
464 | #ifdef __STDC__ |
---|
465 | static struct rx_se_list * |
---|
466 | side_effect_cons (struct rx * rx, |
---|
467 | void * se, struct rx_se_list * list) |
---|
468 | #else |
---|
469 | static struct rx_se_list * |
---|
470 | side_effect_cons (rx, se, list) |
---|
471 | struct rx * rx; |
---|
472 | void * se; |
---|
473 | struct rx_se_list * list; |
---|
474 | #endif |
---|
475 | { |
---|
476 | struct rx_se_list * l; |
---|
477 | l = ((struct rx_se_list *) malloc (sizeof (*l))); |
---|
478 | if (!l) |
---|
479 | return 0; |
---|
480 | l->car = se; |
---|
481 | l->cdr = list; |
---|
482 | return l; |
---|
483 | } |
---|
484 | |
---|
485 | |
---|
486 | #ifdef __STDC__ |
---|
487 | static struct rx_se_list * |
---|
488 | hash_cons_se_prog (struct rx * rx, |
---|
489 | struct rx_hash * memo, |
---|
490 | void * car, struct rx_se_list * cdr) |
---|
491 | #else |
---|
492 | static struct rx_se_list * |
---|
493 | hash_cons_se_prog (rx, memo, car, cdr) |
---|
494 | struct rx * rx; |
---|
495 | struct rx_hash * memo; |
---|
496 | void * car; |
---|
497 | struct rx_se_list * cdr; |
---|
498 | #endif |
---|
499 | { |
---|
500 | long hash = (long)car ^ (long)cdr; |
---|
501 | struct rx_se_list template; |
---|
502 | |
---|
503 | template.car = car; |
---|
504 | template.cdr = cdr; |
---|
505 | { |
---|
506 | struct rx_hash_item * it = rx_hash_store (memo, hash, |
---|
507 | (void *)&template, |
---|
508 | &se_list_hash_rules); |
---|
509 | if (!it) |
---|
510 | return 0; |
---|
511 | if (it->data == (void *)&template) |
---|
512 | { |
---|
513 | struct rx_se_list * consed; |
---|
514 | consed = (struct rx_se_list *) malloc (sizeof (*consed)); |
---|
515 | *consed = template; |
---|
516 | it->data = (void *)consed; |
---|
517 | } |
---|
518 | return (struct rx_se_list *)it->data; |
---|
519 | } |
---|
520 | } |
---|
521 | |
---|
522 | |
---|
523 | #ifdef __STDC__ |
---|
524 | static struct rx_se_list * |
---|
525 | hash_se_prog (struct rx * rx, struct rx_hash * memo, struct rx_se_list * prog) |
---|
526 | #else |
---|
527 | static struct rx_se_list * |
---|
528 | hash_se_prog (rx, memo, prog) |
---|
529 | struct rx * rx; |
---|
530 | struct rx_hash * memo; |
---|
531 | struct rx_se_list * prog; |
---|
532 | #endif |
---|
533 | { |
---|
534 | struct rx_se_list * answer = 0; |
---|
535 | while (prog) |
---|
536 | { |
---|
537 | answer = hash_cons_se_prog (rx, memo, prog->car, answer); |
---|
538 | if (!answer) |
---|
539 | return 0; |
---|
540 | prog = prog->cdr; |
---|
541 | } |
---|
542 | return answer; |
---|
543 | } |
---|
544 | |
---|
545 | |
---|
546 | |
---|
547 | /* {Constructors, etc. for NFA State Sets} |
---|
548 | */ |
---|
549 | |
---|
550 | #ifdef __STDC__ |
---|
551 | static int |
---|
552 | nfa_set_cmp (void * va, void * vb) |
---|
553 | #else |
---|
554 | static int |
---|
555 | nfa_set_cmp (va, vb) |
---|
556 | void * va; |
---|
557 | void * vb; |
---|
558 | #endif |
---|
559 | { |
---|
560 | struct rx_nfa_state_set * a = (struct rx_nfa_state_set *)va; |
---|
561 | struct rx_nfa_state_set * b = (struct rx_nfa_state_set *)vb; |
---|
562 | |
---|
563 | return ((va == vb) |
---|
564 | ? 0 |
---|
565 | : (!va |
---|
566 | ? -1 |
---|
567 | : (!vb |
---|
568 | ? 1 |
---|
569 | : (a->car->id < b->car->id |
---|
570 | ? 1 |
---|
571 | : (a->car->id > b->car->id |
---|
572 | ? -1 |
---|
573 | : nfa_set_cmp ((void *)a->cdr, (void *)b->cdr)))))); |
---|
574 | } |
---|
575 | |
---|
576 | #ifdef __STDC__ |
---|
577 | static int |
---|
578 | nfa_set_equal (void * va, void * vb) |
---|
579 | #else |
---|
580 | static int |
---|
581 | nfa_set_equal (va, vb) |
---|
582 | void * va; |
---|
583 | void * vb; |
---|
584 | #endif |
---|
585 | { |
---|
586 | return !nfa_set_cmp (va, vb); |
---|
587 | } |
---|
588 | |
---|
589 | static struct rx_hash_rules nfa_set_hash_rules = { nfa_set_equal, 0, 0, 0, 0 }; |
---|
590 | |
---|
591 | |
---|
592 | #ifdef __STDC__ |
---|
593 | static struct rx_nfa_state_set * |
---|
594 | nfa_set_cons (struct rx * rx, |
---|
595 | struct rx_hash * memo, struct rx_nfa_state * state, |
---|
596 | struct rx_nfa_state_set * set) |
---|
597 | #else |
---|
598 | static struct rx_nfa_state_set * |
---|
599 | nfa_set_cons (rx, memo, state, set) |
---|
600 | struct rx * rx; |
---|
601 | struct rx_hash * memo; |
---|
602 | struct rx_nfa_state * state; |
---|
603 | struct rx_nfa_state_set * set; |
---|
604 | #endif |
---|
605 | { |
---|
606 | struct rx_nfa_state_set template; |
---|
607 | struct rx_hash_item * node; |
---|
608 | template.car = state; |
---|
609 | template.cdr = set; |
---|
610 | node = rx_hash_store (memo, |
---|
611 | (((long)state) >> 8) ^ (long)set, |
---|
612 | &template, &nfa_set_hash_rules); |
---|
613 | if (!node) |
---|
614 | return 0; |
---|
615 | if (node->data == &template) |
---|
616 | { |
---|
617 | struct rx_nfa_state_set * l; |
---|
618 | l = (struct rx_nfa_state_set *) malloc (sizeof (*l)); |
---|
619 | node->data = (void *) l; |
---|
620 | if (!l) |
---|
621 | return 0; |
---|
622 | *l = template; |
---|
623 | } |
---|
624 | return (struct rx_nfa_state_set *)node->data; |
---|
625 | } |
---|
626 | |
---|
627 | |
---|
628 | #ifdef __STDC__ |
---|
629 | static struct rx_nfa_state_set * |
---|
630 | nfa_set_enjoin (struct rx * rx, |
---|
631 | struct rx_hash * memo, struct rx_nfa_state * state, |
---|
632 | struct rx_nfa_state_set * set) |
---|
633 | #else |
---|
634 | static struct rx_nfa_state_set * |
---|
635 | nfa_set_enjoin (rx, memo, state, set) |
---|
636 | struct rx * rx; |
---|
637 | struct rx_hash * memo; |
---|
638 | struct rx_nfa_state * state; |
---|
639 | struct rx_nfa_state_set * set; |
---|
640 | #endif |
---|
641 | { |
---|
642 | if (!set || (state->id < set->car->id)) |
---|
643 | return nfa_set_cons (rx, memo, state, set); |
---|
644 | if (state->id == set->car->id) |
---|
645 | return set; |
---|
646 | else |
---|
647 | { |
---|
648 | struct rx_nfa_state_set * newcdr |
---|
649 | = nfa_set_enjoin (rx, memo, state, set->cdr); |
---|
650 | if (newcdr != set->cdr) |
---|
651 | set = nfa_set_cons (rx, memo, set->car, newcdr); |
---|
652 | return set; |
---|
653 | } |
---|
654 | } |
---|
655 | |
---|
656 | |
---|
657 | /* {Computing Epsilon/Side-effect Closures.} |
---|
658 | */ |
---|
659 | |
---|
660 | struct eclose_frame |
---|
661 | { |
---|
662 | struct rx_se_list *prog_backwards; |
---|
663 | }; |
---|
664 | |
---|
665 | |
---|
666 | /* This is called while computing closures for "outnode". |
---|
667 | * The current node in the traversal is "node". |
---|
668 | * "frame" contains a list of a all side effects between |
---|
669 | * "outnode" and "node" from most to least recent. |
---|
670 | * |
---|
671 | * Explores forward from "node", adding new possible |
---|
672 | * futures to outnode. |
---|
673 | * |
---|
674 | * Returns 0 on allocation failure. |
---|
675 | */ |
---|
676 | |
---|
677 | #ifdef __STDC__ |
---|
678 | static int |
---|
679 | eclose_node (struct rx *rx, struct rx_nfa_state *outnode, |
---|
680 | struct rx_nfa_state *node, struct eclose_frame *frame) |
---|
681 | #else |
---|
682 | static int |
---|
683 | eclose_node (rx, outnode, node, frame) |
---|
684 | struct rx *rx; |
---|
685 | struct rx_nfa_state *outnode; |
---|
686 | struct rx_nfa_state *node; |
---|
687 | struct eclose_frame *frame; |
---|
688 | #endif |
---|
689 | { |
---|
690 | struct rx_nfa_edge *e = node->edges; |
---|
691 | |
---|
692 | /* For each node, we follow all epsilon paths to build the closure. |
---|
693 | * The closure omits nodes that have only epsilon edges. |
---|
694 | * The closure is split into partial closures -- all the states in |
---|
695 | * a partial closure are reached by crossing the same list of |
---|
696 | * of side effects (though not necessarily the same path). |
---|
697 | */ |
---|
698 | if (node->mark) |
---|
699 | return 1; |
---|
700 | node->mark = 1; |
---|
701 | |
---|
702 | /* If "node" has more than just epsilon and |
---|
703 | * and side-effect transitions (id >= 0), or is final, |
---|
704 | * then it has to be added to the possible futures |
---|
705 | * of "outnode". |
---|
706 | */ |
---|
707 | if (node->id >= 0 || node->is_final) |
---|
708 | { |
---|
709 | struct rx_possible_future **ec; |
---|
710 | struct rx_se_list * prog_in_order; |
---|
711 | int cmp; |
---|
712 | |
---|
713 | prog_in_order = ((struct rx_se_list *)hash_se_prog (rx, |
---|
714 | &rx->se_list_memo, |
---|
715 | frame->prog_backwards)); |
---|
716 | |
---|
717 | ec = &outnode->futures; |
---|
718 | |
---|
719 | while (*ec) |
---|
720 | { |
---|
721 | cmp = se_list_cmp ((void *)(*ec)->effects, (void *)prog_in_order); |
---|
722 | if (cmp <= 0) |
---|
723 | break; |
---|
724 | ec = &(*ec)->next; |
---|
725 | } |
---|
726 | |
---|
727 | if (!*ec || (cmp < 0)) |
---|
728 | { |
---|
729 | struct rx_possible_future * pf; |
---|
730 | pf = rx_possible_future (rx, prog_in_order); |
---|
731 | if (!pf) |
---|
732 | return 0; |
---|
733 | pf->next = *ec; |
---|
734 | *ec = pf; |
---|
735 | } |
---|
736 | if (node->id >= 0) |
---|
737 | { |
---|
738 | (*ec)->destset = nfa_set_enjoin (rx, &rx->set_list_memo, |
---|
739 | node, (*ec)->destset); |
---|
740 | if (!(*ec)->destset) |
---|
741 | return 0; |
---|
742 | } |
---|
743 | } |
---|
744 | |
---|
745 | /* Recurse on outgoing epsilon and side effect nodes. |
---|
746 | */ |
---|
747 | while (e) |
---|
748 | { |
---|
749 | switch (e->type) |
---|
750 | { |
---|
751 | case ne_epsilon: |
---|
752 | if (!eclose_node (rx, outnode, e->dest, frame)) |
---|
753 | return 0; |
---|
754 | break; |
---|
755 | case ne_side_effect: |
---|
756 | { |
---|
757 | frame->prog_backwards = side_effect_cons (rx, |
---|
758 | e->params.side_effect, |
---|
759 | frame->prog_backwards); |
---|
760 | if (!frame->prog_backwards) |
---|
761 | return 0; |
---|
762 | if (!eclose_node (rx, outnode, e->dest, frame)) |
---|
763 | return 0; |
---|
764 | { |
---|
765 | struct rx_se_list * dying = frame->prog_backwards; |
---|
766 | frame->prog_backwards = frame->prog_backwards->cdr; |
---|
767 | free ((char *)dying); |
---|
768 | } |
---|
769 | break; |
---|
770 | } |
---|
771 | default: |
---|
772 | break; |
---|
773 | } |
---|
774 | e = e->next; |
---|
775 | } |
---|
776 | node->mark = 0; |
---|
777 | return 1; |
---|
778 | } |
---|
779 | |
---|
780 | |
---|
781 | #ifdef __STDC__ |
---|
782 | struct rx_possible_future * |
---|
783 | rx_state_possible_futures (struct rx * rx, struct rx_nfa_state * n) |
---|
784 | #else |
---|
785 | struct rx_possible_future * |
---|
786 | rx_state_possible_futures (rx, n) |
---|
787 | struct rx * rx; |
---|
788 | struct rx_nfa_state * n; |
---|
789 | #endif |
---|
790 | { |
---|
791 | if (n->futures_computed) |
---|
792 | return n->futures; |
---|
793 | else |
---|
794 | { |
---|
795 | struct eclose_frame frame; |
---|
796 | frame.prog_backwards = 0; |
---|
797 | if (!eclose_node (rx, n, n, &frame)) |
---|
798 | return 0; |
---|
799 | else |
---|
800 | { |
---|
801 | n->futures_computed = 1; |
---|
802 | return n->futures; |
---|
803 | } |
---|
804 | } |
---|
805 | } |
---|
806 | |
---|
807 | |
---|
808 | |
---|
809 | /* {Storing the NFA in a Contiguous Region of Memory} |
---|
810 | */ |
---|
811 | |
---|
812 | |
---|
813 | |
---|
814 | #ifdef __STDC__ |
---|
815 | static void |
---|
816 | se_memo_freer (struct rx_hash_item * node) |
---|
817 | #else |
---|
818 | static void |
---|
819 | se_memo_freer (node) |
---|
820 | struct rx_hash_item * node; |
---|
821 | #endif |
---|
822 | { |
---|
823 | free ((char *)node->data); |
---|
824 | } |
---|
825 | |
---|
826 | |
---|
827 | #ifdef __STDC__ |
---|
828 | static void |
---|
829 | nfa_set_freer (struct rx_hash_item * node) |
---|
830 | #else |
---|
831 | static void |
---|
832 | nfa_set_freer (node) |
---|
833 | struct rx_hash_item * node; |
---|
834 | #endif |
---|
835 | { |
---|
836 | free ((char *)node->data); |
---|
837 | } |
---|
838 | |
---|
839 | #ifdef __STDC__ |
---|
840 | void |
---|
841 | rx_free_nfa (struct rx *rx) |
---|
842 | #else |
---|
843 | void |
---|
844 | rx_free_nfa (rx) |
---|
845 | struct rx *rx; |
---|
846 | #endif |
---|
847 | { |
---|
848 | rx_free_hash_table (&rx->se_list_memo, se_memo_freer, &se_list_hash_rules); |
---|
849 | rx_bzero ((char *)&rx->se_list_memo, sizeof (rx->se_list_memo)); |
---|
850 | rx_free_hash_table (&rx->set_list_memo, nfa_set_freer, &nfa_set_hash_rules); |
---|
851 | rx_bzero ((char *)&rx->set_list_memo, sizeof (rx->set_list_memo)); |
---|
852 | rx_free_nfa_graph (rx); |
---|
853 | } |
---|