[10429] | 1 | /* Copyright (C) 1995, 1996 Tom Lord |
---|
| 2 | * |
---|
| 3 | * This program is free software; you can redistribute it and/or modify |
---|
| 4 | * it under the terms of the GNU Library General Public License as published by |
---|
| 5 | * the Free Software Foundation; either version 2, or (at your option) |
---|
| 6 | * any later version. |
---|
| 7 | * |
---|
| 8 | * This program is distributed in the hope that it will be useful, |
---|
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
| 11 | * GNU Library General Public License for more details. |
---|
| 12 | * |
---|
| 13 | * You should have received a copy of the GNU Library General Public License |
---|
| 14 | * along with this software; see the file COPYING. If not, write to |
---|
| 15 | * the Free Software Foundation, 59 Temple Place - Suite 330, |
---|
| 16 | * Boston, MA 02111-1307, USA. |
---|
| 17 | */ |
---|
| 18 | |
---|
| 19 | |
---|
| 20 | /* |
---|
| 21 | * Tom Lord (lord@cygnus.com, lord@gnu.ai.mit.edu) |
---|
| 22 | */ |
---|
| 23 | |
---|
| 24 | |
---|
| 25 | #include "rxall.h" |
---|
| 26 | #include "rxnfa.h" |
---|
| 27 | |
---|
| 28 | |
---|
| 29 | |
---|
| 30 | #define remalloc(M, S) (M ? realloc (M, S) : malloc (S)) |
---|
| 31 | |
---|
| 32 | |
---|
| 33 | /* {Low Level Data Structure Code} |
---|
| 34 | */ |
---|
| 35 | |
---|
| 36 | /* Constructs a new nfa node. */ |
---|
| 37 | #ifdef __STDC__ |
---|
| 38 | struct rx_nfa_state * |
---|
| 39 | rx_nfa_state (struct rx *rx) |
---|
| 40 | #else |
---|
| 41 | struct rx_nfa_state * |
---|
| 42 | rx_nfa_state (rx) |
---|
| 43 | struct rx *rx; |
---|
| 44 | #endif |
---|
| 45 | { |
---|
| 46 | struct rx_nfa_state * n = (struct rx_nfa_state *)malloc (sizeof (*n)); |
---|
| 47 | if (!n) |
---|
| 48 | return 0; |
---|
| 49 | rx_bzero ((char *)n, sizeof (*n)); |
---|
| 50 | n->next = rx->nfa_states; |
---|
| 51 | rx->nfa_states = n; |
---|
| 52 | return n; |
---|
| 53 | } |
---|
| 54 | |
---|
| 55 | |
---|
| 56 | #ifdef __STDC__ |
---|
| 57 | static void |
---|
| 58 | rx_free_nfa_state (struct rx_nfa_state * n) |
---|
| 59 | #else |
---|
| 60 | static void |
---|
| 61 | rx_free_nfa_state (n) |
---|
| 62 | struct rx_nfa_state * n; |
---|
| 63 | #endif |
---|
| 64 | { |
---|
| 65 | free ((char *)n); |
---|
| 66 | } |
---|
| 67 | |
---|
| 68 | |
---|
| 69 | /* This adds an edge between two nodes, but doesn't initialize the |
---|
| 70 | * edge label. |
---|
| 71 | */ |
---|
| 72 | |
---|
| 73 | #ifdef __STDC__ |
---|
| 74 | struct rx_nfa_edge * |
---|
| 75 | rx_nfa_edge (struct rx *rx, |
---|
| 76 | enum rx_nfa_etype type, |
---|
| 77 | struct rx_nfa_state *start, |
---|
| 78 | struct rx_nfa_state *dest) |
---|
| 79 | #else |
---|
| 80 | struct rx_nfa_edge * |
---|
| 81 | rx_nfa_edge (rx, type, start, dest) |
---|
| 82 | struct rx *rx; |
---|
| 83 | enum rx_nfa_etype type; |
---|
| 84 | struct rx_nfa_state *start; |
---|
| 85 | struct rx_nfa_state *dest; |
---|
| 86 | #endif |
---|
| 87 | { |
---|
| 88 | struct rx_nfa_edge *e; |
---|
| 89 | e = (struct rx_nfa_edge *)malloc (sizeof (*e)); |
---|
| 90 | if (!e) |
---|
| 91 | return 0; |
---|
| 92 | e->next = start->edges; |
---|
| 93 | start->edges = e; |
---|
| 94 | e->type = type; |
---|
| 95 | e->dest = dest; |
---|
| 96 | return e; |
---|
| 97 | } |
---|
| 98 | |
---|
| 99 | |
---|
| 100 | #ifdef __STDC__ |
---|
| 101 | static void |
---|
| 102 | rx_free_nfa_edge (struct rx_nfa_edge * e) |
---|
| 103 | #else |
---|
| 104 | static void |
---|
| 105 | rx_free_nfa_edge (e) |
---|
| 106 | struct rx_nfa_edge * e; |
---|
| 107 | #endif |
---|
| 108 | { |
---|
| 109 | free ((char *)e); |
---|
| 110 | } |
---|
| 111 | |
---|
| 112 | |
---|
| 113 | /* This constructs a POSSIBLE_FUTURE, which is a kind epsilon-closure |
---|
| 114 | * of an NFA. These are added to an nfa automaticly by eclose_nfa. |
---|
| 115 | */ |
---|
| 116 | |
---|
| 117 | #ifdef __STDC__ |
---|
| 118 | static struct rx_possible_future * |
---|
| 119 | rx_possible_future (struct rx * rx, |
---|
| 120 | struct rx_se_list * effects) |
---|
| 121 | #else |
---|
| 122 | static struct rx_possible_future * |
---|
| 123 | rx_possible_future (rx, effects) |
---|
| 124 | struct rx * rx; |
---|
| 125 | struct rx_se_list * effects; |
---|
| 126 | #endif |
---|
| 127 | { |
---|
| 128 | struct rx_possible_future *ec; |
---|
| 129 | ec = (struct rx_possible_future *) malloc (sizeof (*ec)); |
---|
| 130 | if (!ec) |
---|
| 131 | return 0; |
---|
| 132 | ec->destset = 0; |
---|
| 133 | ec->next = 0; |
---|
| 134 | ec->effects = effects; |
---|
| 135 | return ec; |
---|
| 136 | } |
---|
| 137 | |
---|
| 138 | |
---|
| 139 | #ifdef __STDC__ |
---|
| 140 | static void |
---|
| 141 | rx_free_possible_future (struct rx_possible_future * pf) |
---|
| 142 | #else |
---|
| 143 | static void |
---|
| 144 | rx_free_possible_future (pf) |
---|
| 145 | struct rx_possible_future * pf; |
---|
| 146 | #endif |
---|
| 147 | { |
---|
| 148 | free ((char *)pf); |
---|
| 149 | } |
---|
| 150 | |
---|
| 151 | |
---|
| 152 | #ifdef __STDC__ |
---|
| 153 | static void |
---|
| 154 | rx_free_nfa_graph (struct rx *rx) |
---|
| 155 | #else |
---|
| 156 | static void |
---|
| 157 | rx_free_nfa_graph (rx) |
---|
| 158 | struct rx *rx; |
---|
| 159 | #endif |
---|
| 160 | { |
---|
| 161 | while (rx->nfa_states) |
---|
| 162 | { |
---|
| 163 | while (rx->nfa_states->edges) |
---|
| 164 | { |
---|
| 165 | switch (rx->nfa_states->edges->type) |
---|
| 166 | { |
---|
| 167 | case ne_cset: |
---|
| 168 | rx_free_cset (rx->nfa_states->edges->params.cset); |
---|
| 169 | break; |
---|
| 170 | default: |
---|
| 171 | break; |
---|
| 172 | } |
---|
| 173 | { |
---|
| 174 | struct rx_nfa_edge * e; |
---|
| 175 | e = rx->nfa_states->edges; |
---|
| 176 | rx->nfa_states->edges = rx->nfa_states->edges->next; |
---|
| 177 | rx_free_nfa_edge (e); |
---|
| 178 | } |
---|
| 179 | } /* while (rx->nfa_states->edges) */ |
---|
| 180 | { |
---|
| 181 | /* Iterate over the partial epsilon closures of rx->nfa_states */ |
---|
| 182 | struct rx_possible_future * pf = rx->nfa_states->futures; |
---|
| 183 | while (pf) |
---|
| 184 | { |
---|
| 185 | struct rx_possible_future * pft = pf; |
---|
| 186 | pf = pf->next; |
---|
| 187 | rx_free_possible_future (pft); |
---|
| 188 | } |
---|
| 189 | } |
---|
| 190 | { |
---|
| 191 | struct rx_nfa_state *n; |
---|
| 192 | n = rx->nfa_states; |
---|
| 193 | rx->nfa_states = rx->nfa_states->next; |
---|
| 194 | rx_free_nfa_state (n); |
---|
| 195 | } |
---|
| 196 | } |
---|
| 197 | } |
---|
| 198 | |
---|
| 199 | |
---|
| 200 | |
---|
| 201 | /* {Translating a Syntax Tree into an NFA} |
---|
| 202 | * |
---|
| 203 | */ |
---|
| 204 | |
---|
| 205 | |
---|
| 206 | /* This is the Thompson regexp->nfa algorithm. |
---|
| 207 | * It is modified to allow for `side-effect epsilons.' Those are |
---|
| 208 | * edges that are taken whenever a similarly placed epsilon edge |
---|
| 209 | * would be, but which also imply that some side effect occurs |
---|
| 210 | * when the edge is taken. |
---|
| 211 | * |
---|
| 212 | * Side effects are used to model parts of the pattern langauge |
---|
| 213 | * that are not regular. |
---|
| 214 | */ |
---|
| 215 | |
---|
| 216 | #ifdef __STDC__ |
---|
| 217 | int |
---|
| 218 | rx_build_nfa (struct rx *rx, |
---|
| 219 | struct rexp_node *rexp, |
---|
| 220 | struct rx_nfa_state **start, |
---|
| 221 | struct rx_nfa_state **end) |
---|
| 222 | #else |
---|
| 223 | int |
---|
| 224 | rx_build_nfa (rx, rexp, start, end) |
---|
| 225 | struct rx *rx; |
---|
| 226 | struct rexp_node *rexp; |
---|
| 227 | struct rx_nfa_state **start; |
---|
| 228 | struct rx_nfa_state **end; |
---|
| 229 | #endif |
---|
| 230 | { |
---|
| 231 | struct rx_nfa_edge *edge; |
---|
| 232 | |
---|
| 233 | /* Start & end nodes may have been allocated by the caller. */ |
---|
| 234 | *start = *start ? *start : rx_nfa_state (rx); |
---|
| 235 | |
---|
| 236 | if (!*start) |
---|
| 237 | return 0; |
---|
| 238 | |
---|
| 239 | if (!rexp) |
---|
| 240 | { |
---|
| 241 | *end = *start; |
---|
| 242 | return 1; |
---|
| 243 | } |
---|
| 244 | |
---|
| 245 | *end = *end ? *end : rx_nfa_state (rx); |
---|
| 246 | |
---|
| 247 | if (!*end) |
---|
| 248 | { |
---|
| 249 | rx_free_nfa_state (*start); |
---|
| 250 | return 0; |
---|
| 251 | } |
---|
| 252 | |
---|
| 253 | switch (rexp->type) |
---|
| 254 | { |
---|
| 255 | case r_cset: |
---|
| 256 | edge = rx_nfa_edge (rx, ne_cset, *start, *end); |
---|
| 257 | (*start)->has_cset_edges = 1; |
---|
| 258 | if (!edge) |
---|
| 259 | return 0; |
---|
| 260 | edge->params.cset = rx_copy_cset (rx->local_cset_size, |
---|
| 261 | rexp->params.cset); |
---|
| 262 | if (!edge->params.cset) |
---|
| 263 | { |
---|
| 264 | rx_free_nfa_edge (edge); |
---|
| 265 | return 0; |
---|
| 266 | } |
---|
| 267 | return 1; |
---|
| 268 | |
---|
| 269 | case r_string: |
---|
| 270 | { |
---|
| 271 | if (rexp->params.cstr.len == 1) |
---|
| 272 | { |
---|
| 273 | edge = rx_nfa_edge (rx, ne_cset, *start, *end); |
---|
| 274 | (*start)->has_cset_edges = 1; |
---|
| 275 | if (!edge) |
---|
| 276 | return 0; |
---|
| 277 | edge->params.cset = rx_cset (rx->local_cset_size); |
---|
| 278 | if (!edge->params.cset) |
---|
| 279 | { |
---|
| 280 | rx_free_nfa_edge (edge); |
---|
| 281 | return 0; |
---|
| 282 | } |
---|
| 283 | RX_bitset_enjoin (edge->params.cset, rexp->params.cstr.contents[0]); |
---|
| 284 | return 1; |
---|
| 285 | } |
---|
| 286 | else |
---|
| 287 | { |
---|
| 288 | struct rexp_node copied; |
---|
| 289 | struct rx_nfa_state * shared; |
---|
| 290 | |
---|
| 291 | copied = *rexp; |
---|
| 292 | shared = 0; |
---|
| 293 | copied.params.cstr.len--; |
---|
| 294 | copied.params.cstr.contents++; |
---|
| 295 | if (!rx_build_nfa (rx, &copied, &shared, end)) |
---|
| 296 | return 0; |
---|
| 297 | copied.params.cstr.len = 1; |
---|
| 298 | copied.params.cstr.contents--; |
---|
| 299 | return rx_build_nfa (rx, &copied, start, &shared); |
---|
| 300 | } |
---|
| 301 | } |
---|
| 302 | |
---|
| 303 | case r_opt: |
---|
| 304 | return (rx_build_nfa (rx, rexp->params.pair.left, start, end) |
---|
| 305 | && rx_nfa_edge (rx, ne_epsilon, *start, *end)); |
---|
| 306 | |
---|
| 307 | case r_plus: |
---|
| 308 | { |
---|
| 309 | struct rx_nfa_state * star_start = 0; |
---|
| 310 | struct rx_nfa_state * star_end = 0; |
---|
| 311 | struct rx_nfa_state * shared; |
---|
| 312 | |
---|
| 313 | shared = 0; |
---|
| 314 | if (!rx_build_nfa (rx, rexp->params.pair.left, start, &shared)) |
---|
| 315 | return 0; |
---|
| 316 | return (rx_build_nfa (rx, rexp->params.pair.left, |
---|
| 317 | &star_start, &star_end) |
---|
| 318 | && star_start |
---|
| 319 | && star_end |
---|
| 320 | && rx_nfa_edge (rx, ne_epsilon, star_start, star_end) |
---|
| 321 | && rx_nfa_edge (rx, ne_epsilon, shared, star_start) |
---|
| 322 | && rx_nfa_edge (rx, ne_epsilon, star_end, *end) |
---|
| 323 | && rx_nfa_edge (rx, ne_epsilon, star_end, star_start)); |
---|
| 324 | } |
---|
| 325 | |
---|
| 326 | case r_interval: |
---|
| 327 | case r_star: |
---|
| 328 | { |
---|
| 329 | struct rx_nfa_state * star_start = 0; |
---|
| 330 | struct rx_nfa_state * star_end = 0; |
---|
| 331 | return (rx_build_nfa (rx, rexp->params.pair.left, |
---|
| 332 | &star_start, &star_end) |
---|
| 333 | && star_start |
---|
| 334 | && star_end |
---|
| 335 | && rx_nfa_edge (rx, ne_epsilon, star_start, star_end) |
---|
| 336 | && rx_nfa_edge (rx, ne_epsilon, *start, star_start) |
---|
| 337 | && rx_nfa_edge (rx, ne_epsilon, star_end, *end) |
---|
| 338 | |
---|
| 339 | && rx_nfa_edge (rx, ne_epsilon, star_end, star_start)); |
---|
| 340 | } |
---|
| 341 | |
---|
| 342 | case r_cut: |
---|
| 343 | { |
---|
| 344 | struct rx_nfa_state * cut_end = 0; |
---|
| 345 | |
---|
| 346 | cut_end = rx_nfa_state (rx); |
---|
| 347 | if (!(cut_end && rx_nfa_edge (rx, ne_epsilon, *start, cut_end))) |
---|
| 348 | { |
---|
| 349 | rx_free_nfa_state (*start); |
---|
| 350 | rx_free_nfa_state (*end); |
---|
| 351 | if (cut_end) |
---|
| 352 | rx_free_nfa_state (cut_end); |
---|
| 353 | return 0; |
---|
| 354 | } |
---|
| 355 | cut_end->is_final = rexp->params.intval; |
---|
| 356 | return 1; |
---|
| 357 | } |
---|
| 358 | |
---|
| 359 | case r_parens: |
---|
| 360 | return rx_build_nfa (rx, rexp->params.pair.left, start, end); |
---|
| 361 | |
---|
| 362 | case r_concat: |
---|
| 363 | { |
---|
| 364 | struct rx_nfa_state *shared = 0; |
---|
| 365 | return |
---|
| 366 | (rx_build_nfa (rx, rexp->params.pair.left, start, &shared) |
---|
| 367 | && rx_build_nfa (rx, rexp->params.pair.right, &shared, end)); |
---|
| 368 | } |
---|
| 369 | |
---|
| 370 | case r_alternate: |
---|
| 371 | { |
---|
| 372 | struct rx_nfa_state *ls = 0; |
---|
| 373 | struct rx_nfa_state *le = 0; |
---|
| 374 | struct rx_nfa_state *rs = 0; |
---|
| 375 | struct rx_nfa_state *re = 0; |
---|
| 376 | return (rx_build_nfa (rx, rexp->params.pair.left, &ls, &le) |
---|
| 377 | && rx_build_nfa (rx, rexp->params.pair.right, &rs, &re) |
---|
| 378 | && rx_nfa_edge (rx, ne_epsilon, *start, ls) |
---|
| 379 | && rx_nfa_edge (rx, ne_epsilon, *start, rs) |
---|
| 380 | && rx_nfa_edge (rx, ne_epsilon, le, *end) |
---|
| 381 | && rx_nfa_edge (rx, ne_epsilon, re, *end)); |
---|
| 382 | } |
---|
| 383 | |
---|
| 384 | case r_context: |
---|
| 385 | edge = rx_nfa_edge (rx, ne_side_effect, *start, *end); |
---|
| 386 | if (!edge) |
---|
| 387 | return 0; |
---|
| 388 | edge->params.side_effect = (void *)rexp->params.intval; |
---|
| 389 | return 1; |
---|
| 390 | } |
---|
| 391 | |
---|
| 392 | /* this should never happen */ |
---|
| 393 | return 0; |
---|
| 394 | } |
---|
| 395 | |
---|
| 396 | |
---|
| 397 | /* {Low Level Data Structures for the Static NFA->SuperNFA Analysis} |
---|
| 398 | * |
---|
| 399 | * There are side effect lists -- lists of side effects occuring |
---|
| 400 | * along an uninterrupted, acyclic path of side-effect epsilon edges. |
---|
| 401 | * Such paths are collapsed to single edges in the course of computing |
---|
| 402 | * epsilon closures. The resulting single edges are labled with a list |
---|
| 403 | * of all the side effects from the original multi-edge path. Equivalent |
---|
| 404 | * lists of side effects are made == by the constructors below. |
---|
| 405 | * |
---|
| 406 | * There are also nfa state sets. These are used to hold a list of all |
---|
| 407 | * states reachable from a starting state for a given type of transition |
---|
| 408 | * and side effect list. These are also hash-consed. |
---|
| 409 | */ |
---|
| 410 | |
---|
| 411 | |
---|
| 412 | |
---|
| 413 | /* The next several functions compare, construct, etc. lists of side |
---|
| 414 | * effects. See ECLOSE_NFA (below) for details. |
---|
| 415 | */ |
---|
| 416 | |
---|
| 417 | /* Ordering of rx_se_list |
---|
| 418 | * (-1, 0, 1 return value convention). |
---|
| 419 | */ |
---|
| 420 | |
---|
| 421 | #ifdef __STDC__ |
---|
| 422 | static int |
---|
| 423 | se_list_cmp (void * va, void * vb) |
---|
| 424 | #else |
---|
| 425 | static int |
---|
| 426 | se_list_cmp (va, vb) |
---|
| 427 | void * va; |
---|
| 428 | void * vb; |
---|
| 429 | #endif |
---|
| 430 | { |
---|
| 431 | struct rx_se_list * a = (struct rx_se_list *)va; |
---|
| 432 | struct rx_se_list * b = (struct rx_se_list *)vb; |
---|
| 433 | |
---|
| 434 | return ((va == vb) |
---|
| 435 | ? 0 |
---|
| 436 | : (!va |
---|
| 437 | ? -1 |
---|
| 438 | : (!vb |
---|
| 439 | ? 1 |
---|
| 440 | : ((long)a->car < (long)b->car |
---|
| 441 | ? 1 |
---|
| 442 | : ((long)a->car > (long)b->car |
---|
| 443 | ? -1 |
---|
| 444 | : se_list_cmp ((void *)a->cdr, (void *)b->cdr)))))); |
---|
| 445 | } |
---|
| 446 | |
---|
| 447 | |
---|
| 448 | #ifdef __STDC__ |
---|
| 449 | static int |
---|
| 450 | se_list_equal (void * va, void * vb) |
---|
| 451 | #else |
---|
| 452 | static int |
---|
| 453 | se_list_equal (va, vb) |
---|
| 454 | void * va; |
---|
| 455 | void * vb; |
---|
| 456 | #endif |
---|
| 457 | { |
---|
| 458 | return !(se_list_cmp (va, vb)); |
---|
| 459 | } |
---|
| 460 | |
---|
| 461 | static struct rx_hash_rules se_list_hash_rules = { se_list_equal, 0, 0, 0, 0 }; |
---|
| 462 | |
---|
| 463 | |
---|
| 464 | #ifdef __STDC__ |
---|
| 465 | static struct rx_se_list * |
---|
| 466 | side_effect_cons (struct rx * rx, |
---|
| 467 | void * se, struct rx_se_list * list) |
---|
| 468 | #else |
---|
| 469 | static struct rx_se_list * |
---|
| 470 | side_effect_cons (rx, se, list) |
---|
| 471 | struct rx * rx; |
---|
| 472 | void * se; |
---|
| 473 | struct rx_se_list * list; |
---|
| 474 | #endif |
---|
| 475 | { |
---|
| 476 | struct rx_se_list * l; |
---|
| 477 | l = ((struct rx_se_list *) malloc (sizeof (*l))); |
---|
| 478 | if (!l) |
---|
| 479 | return 0; |
---|
| 480 | l->car = se; |
---|
| 481 | l->cdr = list; |
---|
| 482 | return l; |
---|
| 483 | } |
---|
| 484 | |
---|
| 485 | |
---|
| 486 | #ifdef __STDC__ |
---|
| 487 | static struct rx_se_list * |
---|
| 488 | hash_cons_se_prog (struct rx * rx, |
---|
| 489 | struct rx_hash * memo, |
---|
| 490 | void * car, struct rx_se_list * cdr) |
---|
| 491 | #else |
---|
| 492 | static struct rx_se_list * |
---|
| 493 | hash_cons_se_prog (rx, memo, car, cdr) |
---|
| 494 | struct rx * rx; |
---|
| 495 | struct rx_hash * memo; |
---|
| 496 | void * car; |
---|
| 497 | struct rx_se_list * cdr; |
---|
| 498 | #endif |
---|
| 499 | { |
---|
| 500 | long hash = (long)car ^ (long)cdr; |
---|
| 501 | struct rx_se_list template; |
---|
| 502 | |
---|
| 503 | template.car = car; |
---|
| 504 | template.cdr = cdr; |
---|
| 505 | { |
---|
| 506 | struct rx_hash_item * it = rx_hash_store (memo, hash, |
---|
| 507 | (void *)&template, |
---|
| 508 | &se_list_hash_rules); |
---|
| 509 | if (!it) |
---|
| 510 | return 0; |
---|
| 511 | if (it->data == (void *)&template) |
---|
| 512 | { |
---|
| 513 | struct rx_se_list * consed; |
---|
| 514 | consed = (struct rx_se_list *) malloc (sizeof (*consed)); |
---|
| 515 | *consed = template; |
---|
| 516 | it->data = (void *)consed; |
---|
| 517 | } |
---|
| 518 | return (struct rx_se_list *)it->data; |
---|
| 519 | } |
---|
| 520 | } |
---|
| 521 | |
---|
| 522 | |
---|
| 523 | #ifdef __STDC__ |
---|
| 524 | static struct rx_se_list * |
---|
| 525 | hash_se_prog (struct rx * rx, struct rx_hash * memo, struct rx_se_list * prog) |
---|
| 526 | #else |
---|
| 527 | static struct rx_se_list * |
---|
| 528 | hash_se_prog (rx, memo, prog) |
---|
| 529 | struct rx * rx; |
---|
| 530 | struct rx_hash * memo; |
---|
| 531 | struct rx_se_list * prog; |
---|
| 532 | #endif |
---|
| 533 | { |
---|
| 534 | struct rx_se_list * answer = 0; |
---|
| 535 | while (prog) |
---|
| 536 | { |
---|
| 537 | answer = hash_cons_se_prog (rx, memo, prog->car, answer); |
---|
| 538 | if (!answer) |
---|
| 539 | return 0; |
---|
| 540 | prog = prog->cdr; |
---|
| 541 | } |
---|
| 542 | return answer; |
---|
| 543 | } |
---|
| 544 | |
---|
| 545 | |
---|
| 546 | |
---|
| 547 | /* {Constructors, etc. for NFA State Sets} |
---|
| 548 | */ |
---|
| 549 | |
---|
| 550 | #ifdef __STDC__ |
---|
| 551 | static int |
---|
| 552 | nfa_set_cmp (void * va, void * vb) |
---|
| 553 | #else |
---|
| 554 | static int |
---|
| 555 | nfa_set_cmp (va, vb) |
---|
| 556 | void * va; |
---|
| 557 | void * vb; |
---|
| 558 | #endif |
---|
| 559 | { |
---|
| 560 | struct rx_nfa_state_set * a = (struct rx_nfa_state_set *)va; |
---|
| 561 | struct rx_nfa_state_set * b = (struct rx_nfa_state_set *)vb; |
---|
| 562 | |
---|
| 563 | return ((va == vb) |
---|
| 564 | ? 0 |
---|
| 565 | : (!va |
---|
| 566 | ? -1 |
---|
| 567 | : (!vb |
---|
| 568 | ? 1 |
---|
| 569 | : (a->car->id < b->car->id |
---|
| 570 | ? 1 |
---|
| 571 | : (a->car->id > b->car->id |
---|
| 572 | ? -1 |
---|
| 573 | : nfa_set_cmp ((void *)a->cdr, (void *)b->cdr)))))); |
---|
| 574 | } |
---|
| 575 | |
---|
| 576 | #ifdef __STDC__ |
---|
| 577 | static int |
---|
| 578 | nfa_set_equal (void * va, void * vb) |
---|
| 579 | #else |
---|
| 580 | static int |
---|
| 581 | nfa_set_equal (va, vb) |
---|
| 582 | void * va; |
---|
| 583 | void * vb; |
---|
| 584 | #endif |
---|
| 585 | { |
---|
| 586 | return !nfa_set_cmp (va, vb); |
---|
| 587 | } |
---|
| 588 | |
---|
| 589 | static struct rx_hash_rules nfa_set_hash_rules = { nfa_set_equal, 0, 0, 0, 0 }; |
---|
| 590 | |
---|
| 591 | |
---|
| 592 | #ifdef __STDC__ |
---|
| 593 | static struct rx_nfa_state_set * |
---|
| 594 | nfa_set_cons (struct rx * rx, |
---|
| 595 | struct rx_hash * memo, struct rx_nfa_state * state, |
---|
| 596 | struct rx_nfa_state_set * set) |
---|
| 597 | #else |
---|
| 598 | static struct rx_nfa_state_set * |
---|
| 599 | nfa_set_cons (rx, memo, state, set) |
---|
| 600 | struct rx * rx; |
---|
| 601 | struct rx_hash * memo; |
---|
| 602 | struct rx_nfa_state * state; |
---|
| 603 | struct rx_nfa_state_set * set; |
---|
| 604 | #endif |
---|
| 605 | { |
---|
| 606 | struct rx_nfa_state_set template; |
---|
| 607 | struct rx_hash_item * node; |
---|
| 608 | template.car = state; |
---|
| 609 | template.cdr = set; |
---|
| 610 | node = rx_hash_store (memo, |
---|
| 611 | (((long)state) >> 8) ^ (long)set, |
---|
| 612 | &template, &nfa_set_hash_rules); |
---|
| 613 | if (!node) |
---|
| 614 | return 0; |
---|
| 615 | if (node->data == &template) |
---|
| 616 | { |
---|
| 617 | struct rx_nfa_state_set * l; |
---|
| 618 | l = (struct rx_nfa_state_set *) malloc (sizeof (*l)); |
---|
| 619 | node->data = (void *) l; |
---|
| 620 | if (!l) |
---|
| 621 | return 0; |
---|
| 622 | *l = template; |
---|
| 623 | } |
---|
| 624 | return (struct rx_nfa_state_set *)node->data; |
---|
| 625 | } |
---|
| 626 | |
---|
| 627 | |
---|
| 628 | #ifdef __STDC__ |
---|
| 629 | static struct rx_nfa_state_set * |
---|
| 630 | nfa_set_enjoin (struct rx * rx, |
---|
| 631 | struct rx_hash * memo, struct rx_nfa_state * state, |
---|
| 632 | struct rx_nfa_state_set * set) |
---|
| 633 | #else |
---|
| 634 | static struct rx_nfa_state_set * |
---|
| 635 | nfa_set_enjoin (rx, memo, state, set) |
---|
| 636 | struct rx * rx; |
---|
| 637 | struct rx_hash * memo; |
---|
| 638 | struct rx_nfa_state * state; |
---|
| 639 | struct rx_nfa_state_set * set; |
---|
| 640 | #endif |
---|
| 641 | { |
---|
| 642 | if (!set || (state->id < set->car->id)) |
---|
| 643 | return nfa_set_cons (rx, memo, state, set); |
---|
| 644 | if (state->id == set->car->id) |
---|
| 645 | return set; |
---|
| 646 | else |
---|
| 647 | { |
---|
| 648 | struct rx_nfa_state_set * newcdr |
---|
| 649 | = nfa_set_enjoin (rx, memo, state, set->cdr); |
---|
| 650 | if (newcdr != set->cdr) |
---|
| 651 | set = nfa_set_cons (rx, memo, set->car, newcdr); |
---|
| 652 | return set; |
---|
| 653 | } |
---|
| 654 | } |
---|
| 655 | |
---|
| 656 | |
---|
| 657 | /* {Computing Epsilon/Side-effect Closures.} |
---|
| 658 | */ |
---|
| 659 | |
---|
| 660 | struct eclose_frame |
---|
| 661 | { |
---|
| 662 | struct rx_se_list *prog_backwards; |
---|
| 663 | }; |
---|
| 664 | |
---|
| 665 | |
---|
| 666 | /* This is called while computing closures for "outnode". |
---|
| 667 | * The current node in the traversal is "node". |
---|
| 668 | * "frame" contains a list of a all side effects between |
---|
| 669 | * "outnode" and "node" from most to least recent. |
---|
| 670 | * |
---|
| 671 | * Explores forward from "node", adding new possible |
---|
| 672 | * futures to outnode. |
---|
| 673 | * |
---|
| 674 | * Returns 0 on allocation failure. |
---|
| 675 | */ |
---|
| 676 | |
---|
| 677 | #ifdef __STDC__ |
---|
| 678 | static int |
---|
| 679 | eclose_node (struct rx *rx, struct rx_nfa_state *outnode, |
---|
| 680 | struct rx_nfa_state *node, struct eclose_frame *frame) |
---|
| 681 | #else |
---|
| 682 | static int |
---|
| 683 | eclose_node (rx, outnode, node, frame) |
---|
| 684 | struct rx *rx; |
---|
| 685 | struct rx_nfa_state *outnode; |
---|
| 686 | struct rx_nfa_state *node; |
---|
| 687 | struct eclose_frame *frame; |
---|
| 688 | #endif |
---|
| 689 | { |
---|
| 690 | struct rx_nfa_edge *e = node->edges; |
---|
| 691 | |
---|
| 692 | /* For each node, we follow all epsilon paths to build the closure. |
---|
| 693 | * The closure omits nodes that have only epsilon edges. |
---|
| 694 | * The closure is split into partial closures -- all the states in |
---|
| 695 | * a partial closure are reached by crossing the same list of |
---|
| 696 | * of side effects (though not necessarily the same path). |
---|
| 697 | */ |
---|
| 698 | if (node->mark) |
---|
| 699 | return 1; |
---|
| 700 | node->mark = 1; |
---|
| 701 | |
---|
| 702 | /* If "node" has more than just epsilon and |
---|
| 703 | * and side-effect transitions (id >= 0), or is final, |
---|
| 704 | * then it has to be added to the possible futures |
---|
| 705 | * of "outnode". |
---|
| 706 | */ |
---|
| 707 | if (node->id >= 0 || node->is_final) |
---|
| 708 | { |
---|
| 709 | struct rx_possible_future **ec; |
---|
| 710 | struct rx_se_list * prog_in_order; |
---|
| 711 | int cmp; |
---|
| 712 | |
---|
| 713 | prog_in_order = ((struct rx_se_list *)hash_se_prog (rx, |
---|
| 714 | &rx->se_list_memo, |
---|
| 715 | frame->prog_backwards)); |
---|
| 716 | |
---|
| 717 | ec = &outnode->futures; |
---|
| 718 | |
---|
| 719 | while (*ec) |
---|
| 720 | { |
---|
| 721 | cmp = se_list_cmp ((void *)(*ec)->effects, (void *)prog_in_order); |
---|
| 722 | if (cmp <= 0) |
---|
| 723 | break; |
---|
| 724 | ec = &(*ec)->next; |
---|
| 725 | } |
---|
| 726 | |
---|
| 727 | if (!*ec || (cmp < 0)) |
---|
| 728 | { |
---|
| 729 | struct rx_possible_future * pf; |
---|
| 730 | pf = rx_possible_future (rx, prog_in_order); |
---|
| 731 | if (!pf) |
---|
| 732 | return 0; |
---|
| 733 | pf->next = *ec; |
---|
| 734 | *ec = pf; |
---|
| 735 | } |
---|
| 736 | if (node->id >= 0) |
---|
| 737 | { |
---|
| 738 | (*ec)->destset = nfa_set_enjoin (rx, &rx->set_list_memo, |
---|
| 739 | node, (*ec)->destset); |
---|
| 740 | if (!(*ec)->destset) |
---|
| 741 | return 0; |
---|
| 742 | } |
---|
| 743 | } |
---|
| 744 | |
---|
| 745 | /* Recurse on outgoing epsilon and side effect nodes. |
---|
| 746 | */ |
---|
| 747 | while (e) |
---|
| 748 | { |
---|
| 749 | switch (e->type) |
---|
| 750 | { |
---|
| 751 | case ne_epsilon: |
---|
| 752 | if (!eclose_node (rx, outnode, e->dest, frame)) |
---|
| 753 | return 0; |
---|
| 754 | break; |
---|
| 755 | case ne_side_effect: |
---|
| 756 | { |
---|
| 757 | frame->prog_backwards = side_effect_cons (rx, |
---|
| 758 | e->params.side_effect, |
---|
| 759 | frame->prog_backwards); |
---|
| 760 | if (!frame->prog_backwards) |
---|
| 761 | return 0; |
---|
| 762 | if (!eclose_node (rx, outnode, e->dest, frame)) |
---|
| 763 | return 0; |
---|
| 764 | { |
---|
| 765 | struct rx_se_list * dying = frame->prog_backwards; |
---|
| 766 | frame->prog_backwards = frame->prog_backwards->cdr; |
---|
| 767 | free ((char *)dying); |
---|
| 768 | } |
---|
| 769 | break; |
---|
| 770 | } |
---|
| 771 | default: |
---|
| 772 | break; |
---|
| 773 | } |
---|
| 774 | e = e->next; |
---|
| 775 | } |
---|
| 776 | node->mark = 0; |
---|
| 777 | return 1; |
---|
| 778 | } |
---|
| 779 | |
---|
| 780 | |
---|
| 781 | #ifdef __STDC__ |
---|
| 782 | struct rx_possible_future * |
---|
| 783 | rx_state_possible_futures (struct rx * rx, struct rx_nfa_state * n) |
---|
| 784 | #else |
---|
| 785 | struct rx_possible_future * |
---|
| 786 | rx_state_possible_futures (rx, n) |
---|
| 787 | struct rx * rx; |
---|
| 788 | struct rx_nfa_state * n; |
---|
| 789 | #endif |
---|
| 790 | { |
---|
| 791 | if (n->futures_computed) |
---|
| 792 | return n->futures; |
---|
| 793 | else |
---|
| 794 | { |
---|
| 795 | struct eclose_frame frame; |
---|
| 796 | frame.prog_backwards = 0; |
---|
| 797 | if (!eclose_node (rx, n, n, &frame)) |
---|
| 798 | return 0; |
---|
| 799 | else |
---|
| 800 | { |
---|
| 801 | n->futures_computed = 1; |
---|
| 802 | return n->futures; |
---|
| 803 | } |
---|
| 804 | } |
---|
| 805 | } |
---|
| 806 | |
---|
| 807 | |
---|
| 808 | |
---|
| 809 | /* {Storing the NFA in a Contiguous Region of Memory} |
---|
| 810 | */ |
---|
| 811 | |
---|
| 812 | |
---|
| 813 | |
---|
| 814 | #ifdef __STDC__ |
---|
| 815 | static void |
---|
| 816 | se_memo_freer (struct rx_hash_item * node) |
---|
| 817 | #else |
---|
| 818 | static void |
---|
| 819 | se_memo_freer (node) |
---|
| 820 | struct rx_hash_item * node; |
---|
| 821 | #endif |
---|
| 822 | { |
---|
| 823 | free ((char *)node->data); |
---|
| 824 | } |
---|
| 825 | |
---|
| 826 | |
---|
| 827 | #ifdef __STDC__ |
---|
| 828 | static void |
---|
| 829 | nfa_set_freer (struct rx_hash_item * node) |
---|
| 830 | #else |
---|
| 831 | static void |
---|
| 832 | nfa_set_freer (node) |
---|
| 833 | struct rx_hash_item * node; |
---|
| 834 | #endif |
---|
| 835 | { |
---|
| 836 | free ((char *)node->data); |
---|
| 837 | } |
---|
| 838 | |
---|
| 839 | #ifdef __STDC__ |
---|
| 840 | void |
---|
| 841 | rx_free_nfa (struct rx *rx) |
---|
| 842 | #else |
---|
| 843 | void |
---|
| 844 | rx_free_nfa (rx) |
---|
| 845 | struct rx *rx; |
---|
| 846 | #endif |
---|
| 847 | { |
---|
| 848 | rx_free_hash_table (&rx->se_list_memo, se_memo_freer, &se_list_hash_rules); |
---|
| 849 | rx_bzero ((char *)&rx->se_list_memo, sizeof (rx->se_list_memo)); |
---|
| 850 | rx_free_hash_table (&rx->set_list_memo, nfa_set_freer, &nfa_set_hash_rules); |
---|
| 851 | rx_bzero ((char *)&rx->set_list_memo, sizeof (rx->set_list_memo)); |
---|
| 852 | rx_free_nfa_graph (rx); |
---|
| 853 | } |
---|