1 | |
---|
2 | |
---|
3 | /* Copyright (C) 1995, 1996 Tom Lord |
---|
4 | * |
---|
5 | * This program is free software; you can redistribute it and/or modify |
---|
6 | * it under the terms of the GNU Library General Public License as published by |
---|
7 | * the Free Software Foundation; either version 2, or (at your option) |
---|
8 | * any later version. |
---|
9 | * |
---|
10 | * This program is distributed in the hope that it will be useful, |
---|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
13 | * GNU Library General Public License for more details. |
---|
14 | * |
---|
15 | * You should have received a copy of the GNU Library General Public License |
---|
16 | * along with this software; see the file COPYING. If not, write to |
---|
17 | * the Free Software Foundation, 59 Temple Place - Suite 330, |
---|
18 | * Boston, MA 02111-1307, USA. |
---|
19 | */ |
---|
20 | |
---|
21 | |
---|
22 | |
---|
23 | #include "rxall.h" |
---|
24 | #include "rxsuper.h" |
---|
25 | |
---|
26 | |
---|
27 | /* The functions in the next several pages define the lazy-NFA-conversion used |
---|
28 | * by matchers. The input to this construction is an NFA such as |
---|
29 | * is built by compactify_nfa (rx.c). The output is the superNFA. |
---|
30 | */ |
---|
31 | |
---|
32 | /* Match engines can use arbitrary values for opcodes. So, the parse tree |
---|
33 | * is built using instructions names (enum rx_opcode), but the superstate |
---|
34 | * nfa is populated with mystery opcodes (void *). |
---|
35 | * |
---|
36 | * For convenience, here is an id table. The opcodes are == to their inxs |
---|
37 | * |
---|
38 | * The lables in re_search_2 would make good values for instructions. |
---|
39 | */ |
---|
40 | |
---|
41 | void * rx_id_instruction_table[rx_num_instructions] = |
---|
42 | { |
---|
43 | (void *) rx_backtrack_point, |
---|
44 | (void *) rx_do_side_effects, |
---|
45 | (void *) rx_cache_miss, |
---|
46 | (void *) rx_next_char, |
---|
47 | (void *) rx_backtrack, |
---|
48 | (void *) rx_error_inx |
---|
49 | }; |
---|
50 | |
---|
51 | |
---|
52 | |
---|
53 | |
---|
54 | /* The partially instantiated superstate graph has a transition |
---|
55 | * table at every node. There is one entry for every character. |
---|
56 | * This fills in the transition for a set. |
---|
57 | */ |
---|
58 | #ifdef __STDC__ |
---|
59 | static void |
---|
60 | install_transition (struct rx_superstate *super, |
---|
61 | struct rx_inx *answer, rx_Bitset trcset) |
---|
62 | #else |
---|
63 | static void |
---|
64 | install_transition (super, answer, trcset) |
---|
65 | struct rx_superstate *super; |
---|
66 | struct rx_inx *answer; |
---|
67 | rx_Bitset trcset; |
---|
68 | #endif |
---|
69 | { |
---|
70 | struct rx_inx * transitions = super->transitions; |
---|
71 | int chr; |
---|
72 | for (chr = 0; chr < 256; ) |
---|
73 | if (!*trcset) |
---|
74 | { |
---|
75 | ++trcset; |
---|
76 | chr += 32; |
---|
77 | } |
---|
78 | else |
---|
79 | { |
---|
80 | RX_subset sub = *trcset; |
---|
81 | RX_subset mask = 1; |
---|
82 | int bound = chr + 32; |
---|
83 | while (chr < bound) |
---|
84 | { |
---|
85 | if (sub & mask) |
---|
86 | transitions [chr] = *answer; |
---|
87 | ++chr; |
---|
88 | mask <<= 1; |
---|
89 | } |
---|
90 | ++trcset; |
---|
91 | } |
---|
92 | } |
---|
93 | |
---|
94 | |
---|
95 | #ifdef __STDC__ |
---|
96 | static int |
---|
97 | qlen (struct rx_superstate * q) |
---|
98 | #else |
---|
99 | static int |
---|
100 | qlen (q) |
---|
101 | struct rx_superstate * q; |
---|
102 | #endif |
---|
103 | { |
---|
104 | int count = 1; |
---|
105 | struct rx_superstate * it; |
---|
106 | if (!q) |
---|
107 | return 0; |
---|
108 | for (it = q->next_recyclable; it != q; it = it->next_recyclable) |
---|
109 | ++count; |
---|
110 | return count; |
---|
111 | } |
---|
112 | |
---|
113 | #ifdef __STDC__ |
---|
114 | static void |
---|
115 | check_cache (struct rx_cache * cache) |
---|
116 | #else |
---|
117 | static void |
---|
118 | check_cache (cache) |
---|
119 | struct rx_cache * cache; |
---|
120 | #endif |
---|
121 | { |
---|
122 | struct rx_cache * you_fucked_up = 0; |
---|
123 | int total = cache->superstates; |
---|
124 | int semi = cache->semifree_superstates; |
---|
125 | if (semi != qlen (cache->semifree_superstate)) |
---|
126 | check_cache (you_fucked_up); |
---|
127 | if ((total - semi) != qlen (cache->lru_superstate)) |
---|
128 | check_cache (you_fucked_up); |
---|
129 | } |
---|
130 | #ifdef __STDC__ |
---|
131 | char * |
---|
132 | rx_cache_malloc (struct rx_cache * cache, int size) |
---|
133 | #else |
---|
134 | char * |
---|
135 | rx_cache_malloc (cache, size) |
---|
136 | struct rx_cache * cache; |
---|
137 | int size; |
---|
138 | #endif |
---|
139 | { |
---|
140 | char * answer; |
---|
141 | answer = (char *)malloc (size); |
---|
142 | if (answer) |
---|
143 | cache->bytes_used += size; |
---|
144 | return answer; |
---|
145 | } |
---|
146 | |
---|
147 | |
---|
148 | #ifdef __STDC__ |
---|
149 | void |
---|
150 | rx_cache_free (struct rx_cache * cache, int size, char * mem) |
---|
151 | #else |
---|
152 | void |
---|
153 | rx_cache_free (cache, size, mem) |
---|
154 | struct rx_cache * cache; |
---|
155 | int size; |
---|
156 | char * mem; |
---|
157 | #endif |
---|
158 | { |
---|
159 | free (mem); |
---|
160 | cache->bytes_used -= size; |
---|
161 | } |
---|
162 | |
---|
163 | |
---|
164 | |
---|
165 | /* When a superstate is old and neglected, it can enter a |
---|
166 | * semi-free state. A semi-free state is slated to die. |
---|
167 | * Incoming transitions to a semi-free state are re-written |
---|
168 | * to cause an (interpreted) fault when they are taken. |
---|
169 | * The fault handler revives the semi-free state, patches |
---|
170 | * incoming transitions back to normal, and continues. |
---|
171 | * |
---|
172 | * The idea is basicly to free in two stages, aborting |
---|
173 | * between the two if the state turns out to be useful again. |
---|
174 | * When a free is aborted, the rescued superstate is placed |
---|
175 | * in the most-favored slot to maximize the time until it |
---|
176 | * is next semi-freed. |
---|
177 | * |
---|
178 | * Overall, this approximates truly freeing superstates in |
---|
179 | * lru order, but does not involve the cost of updating perfectly |
---|
180 | * accurate timestamps every time a superstate is touched. |
---|
181 | */ |
---|
182 | |
---|
183 | #ifdef __STDC__ |
---|
184 | static void |
---|
185 | semifree_superstate (struct rx_cache * cache) |
---|
186 | #else |
---|
187 | static void |
---|
188 | semifree_superstate (cache) |
---|
189 | struct rx_cache * cache; |
---|
190 | #endif |
---|
191 | { |
---|
192 | int disqualified = cache->semifree_superstates; |
---|
193 | if (disqualified == cache->superstates) |
---|
194 | return; |
---|
195 | while (cache->lru_superstate->locks) |
---|
196 | { |
---|
197 | cache->lru_superstate = cache->lru_superstate->next_recyclable; |
---|
198 | ++disqualified; |
---|
199 | if (disqualified == cache->superstates) |
---|
200 | return; |
---|
201 | } |
---|
202 | { |
---|
203 | struct rx_superstate * it = cache->lru_superstate; |
---|
204 | it->next_recyclable->prev_recyclable = it->prev_recyclable; |
---|
205 | it->prev_recyclable->next_recyclable = it->next_recyclable; |
---|
206 | cache->lru_superstate = (it == it->next_recyclable |
---|
207 | ? 0 |
---|
208 | : it->next_recyclable); |
---|
209 | if (!cache->semifree_superstate) |
---|
210 | { |
---|
211 | cache->semifree_superstate = it; |
---|
212 | it->next_recyclable = it; |
---|
213 | it->prev_recyclable = it; |
---|
214 | } |
---|
215 | else |
---|
216 | { |
---|
217 | it->prev_recyclable = cache->semifree_superstate->prev_recyclable; |
---|
218 | it->next_recyclable = cache->semifree_superstate; |
---|
219 | it->prev_recyclable->next_recyclable = it; |
---|
220 | it->next_recyclable->prev_recyclable = it; |
---|
221 | } |
---|
222 | { |
---|
223 | struct rx_distinct_future *df; |
---|
224 | it->is_semifree = 1; |
---|
225 | ++cache->semifree_superstates; |
---|
226 | df = it->transition_refs; |
---|
227 | if (df) |
---|
228 | { |
---|
229 | df->prev_same_dest->next_same_dest = 0; |
---|
230 | for (df = it->transition_refs; df; df = df->next_same_dest) |
---|
231 | { |
---|
232 | df->future_frame.inx = cache->instruction_table[rx_cache_miss]; |
---|
233 | df->future_frame.data = 0; |
---|
234 | df->future_frame.data_2 = (void *) df; |
---|
235 | /* If there are any NEXT-CHAR instruction frames that |
---|
236 | * refer to this state, we convert them to CACHE-MISS frames. |
---|
237 | */ |
---|
238 | if (!df->effects |
---|
239 | && (df->edge->options->next_same_super_edge[0] |
---|
240 | == df->edge->options)) |
---|
241 | install_transition (df->present, &df->future_frame, |
---|
242 | df->edge->cset); |
---|
243 | } |
---|
244 | df = it->transition_refs; |
---|
245 | df->prev_same_dest->next_same_dest = df; |
---|
246 | } |
---|
247 | } |
---|
248 | } |
---|
249 | } |
---|
250 | |
---|
251 | |
---|
252 | #ifdef __STDC__ |
---|
253 | static void |
---|
254 | refresh_semifree_superstate (struct rx_cache * cache, |
---|
255 | struct rx_superstate * super) |
---|
256 | #else |
---|
257 | static void |
---|
258 | refresh_semifree_superstate (cache, super) |
---|
259 | struct rx_cache * cache; |
---|
260 | struct rx_superstate * super; |
---|
261 | #endif |
---|
262 | { |
---|
263 | struct rx_distinct_future *df; |
---|
264 | |
---|
265 | if (super->transition_refs) |
---|
266 | { |
---|
267 | super->transition_refs->prev_same_dest->next_same_dest = 0; |
---|
268 | for (df = super->transition_refs; df; df = df->next_same_dest) |
---|
269 | { |
---|
270 | df->future_frame.inx = cache->instruction_table[rx_next_char]; |
---|
271 | df->future_frame.data = (void *) super->transitions; |
---|
272 | df->future_frame.data_2 = (void *)(super->contents->is_final); |
---|
273 | /* CACHE-MISS instruction frames that refer to this state, |
---|
274 | * must be converted to NEXT-CHAR frames. |
---|
275 | */ |
---|
276 | if (!df->effects |
---|
277 | && (df->edge->options->next_same_super_edge[0] |
---|
278 | == df->edge->options)) |
---|
279 | install_transition (df->present, &df->future_frame, |
---|
280 | df->edge->cset); |
---|
281 | } |
---|
282 | super->transition_refs->prev_same_dest->next_same_dest |
---|
283 | = super->transition_refs; |
---|
284 | } |
---|
285 | if (cache->semifree_superstate == super) |
---|
286 | cache->semifree_superstate = (super->prev_recyclable == super |
---|
287 | ? 0 |
---|
288 | : super->prev_recyclable); |
---|
289 | super->next_recyclable->prev_recyclable = super->prev_recyclable; |
---|
290 | super->prev_recyclable->next_recyclable = super->next_recyclable; |
---|
291 | |
---|
292 | if (!cache->lru_superstate) |
---|
293 | (cache->lru_superstate |
---|
294 | = super->next_recyclable |
---|
295 | = super->prev_recyclable |
---|
296 | = super); |
---|
297 | else |
---|
298 | { |
---|
299 | super->next_recyclable = cache->lru_superstate; |
---|
300 | super->prev_recyclable = cache->lru_superstate->prev_recyclable; |
---|
301 | super->next_recyclable->prev_recyclable = super; |
---|
302 | super->prev_recyclable->next_recyclable = super; |
---|
303 | } |
---|
304 | super->is_semifree = 0; |
---|
305 | --cache->semifree_superstates; |
---|
306 | } |
---|
307 | |
---|
308 | #ifdef __STDC__ |
---|
309 | void |
---|
310 | rx_refresh_this_superstate (struct rx_cache * cache, |
---|
311 | struct rx_superstate * superstate) |
---|
312 | #else |
---|
313 | void |
---|
314 | rx_refresh_this_superstate (cache, superstate) |
---|
315 | struct rx_cache * cache; |
---|
316 | struct rx_superstate * superstate; |
---|
317 | #endif |
---|
318 | { |
---|
319 | if (superstate->is_semifree) |
---|
320 | refresh_semifree_superstate (cache, superstate); |
---|
321 | else if (cache->lru_superstate == superstate) |
---|
322 | cache->lru_superstate = superstate->next_recyclable; |
---|
323 | else if (superstate != cache->lru_superstate->prev_recyclable) |
---|
324 | { |
---|
325 | superstate->next_recyclable->prev_recyclable |
---|
326 | = superstate->prev_recyclable; |
---|
327 | superstate->prev_recyclable->next_recyclable |
---|
328 | = superstate->next_recyclable; |
---|
329 | superstate->next_recyclable = cache->lru_superstate; |
---|
330 | superstate->prev_recyclable = cache->lru_superstate->prev_recyclable; |
---|
331 | superstate->next_recyclable->prev_recyclable = superstate; |
---|
332 | superstate->prev_recyclable->next_recyclable = superstate; |
---|
333 | } |
---|
334 | } |
---|
335 | |
---|
336 | #ifdef __STDC__ |
---|
337 | static void |
---|
338 | release_superset_low (struct rx_cache * cache, |
---|
339 | struct rx_superset *set) |
---|
340 | #else |
---|
341 | static void |
---|
342 | release_superset_low (cache, set) |
---|
343 | struct rx_cache * cache; |
---|
344 | struct rx_superset *set; |
---|
345 | #endif |
---|
346 | { |
---|
347 | if (!--set->refs) |
---|
348 | { |
---|
349 | if (set->starts_for) |
---|
350 | set->starts_for->start_set = 0; |
---|
351 | |
---|
352 | if (set->cdr) |
---|
353 | release_superset_low (cache, set->cdr); |
---|
354 | |
---|
355 | rx_hash_free (&set->hash_item, &cache->superset_hash_rules); |
---|
356 | rx_cache_free (cache, |
---|
357 | sizeof (struct rx_superset), |
---|
358 | (char *)set); |
---|
359 | } |
---|
360 | } |
---|
361 | |
---|
362 | #ifdef __STDC__ |
---|
363 | void |
---|
364 | rx_release_superset (struct rx *rx, |
---|
365 | struct rx_superset *set) |
---|
366 | #else |
---|
367 | void |
---|
368 | rx_release_superset (rx, set) |
---|
369 | struct rx *rx; |
---|
370 | struct rx_superset *set; |
---|
371 | #endif |
---|
372 | { |
---|
373 | release_superset_low (rx->cache, set); |
---|
374 | } |
---|
375 | |
---|
376 | /* This tries to add a new superstate to the superstate freelist. |
---|
377 | * It might, as a result, free some edge pieces or hash tables. |
---|
378 | * If nothing can be freed because too many locks are being held, fail. |
---|
379 | */ |
---|
380 | |
---|
381 | #ifdef __STDC__ |
---|
382 | static int |
---|
383 | rx_really_free_superstate (struct rx_cache * cache) |
---|
384 | #else |
---|
385 | static int |
---|
386 | rx_really_free_superstate (cache) |
---|
387 | struct rx_cache * cache; |
---|
388 | #endif |
---|
389 | { |
---|
390 | int locked_superstates = 0; |
---|
391 | struct rx_superstate * it; |
---|
392 | |
---|
393 | if (!cache->superstates) |
---|
394 | return 0; |
---|
395 | |
---|
396 | /* scale these */ |
---|
397 | while ((cache->hits + cache->misses) > cache->superstates) |
---|
398 | { |
---|
399 | cache->hits >>= 1; |
---|
400 | cache->misses >>= 1; |
---|
401 | } |
---|
402 | |
---|
403 | /* semifree superstates faster than they are freed |
---|
404 | * so popular states can be rescued. |
---|
405 | */ |
---|
406 | semifree_superstate (cache); |
---|
407 | semifree_superstate (cache); |
---|
408 | semifree_superstate (cache); |
---|
409 | |
---|
410 | #if 0 |
---|
411 | Redundant because semifree_superstate already |
---|
412 | makes this check; |
---|
413 | |
---|
414 | |
---|
415 | while (cache->semifree_superstate && cache->semifree_superstate->locks) |
---|
416 | { |
---|
417 | refresh_semifree_superstate (cache, cache->semifree_superstate); |
---|
418 | ++locked_superstates; |
---|
419 | if (locked_superstates == cache->superstates) |
---|
420 | return 0; |
---|
421 | } |
---|
422 | |
---|
423 | |
---|
424 | if (cache->semifree_superstate) |
---|
425 | insert the "it =" block which follows this "if 0" code; |
---|
426 | else |
---|
427 | { |
---|
428 | while (cache->lru_superstate->locks) |
---|
429 | { |
---|
430 | cache->lru_superstate = cache->lru_superstate->next_recyclable; |
---|
431 | ++locked_superstates; |
---|
432 | if (locked_superstates == cache->superstates) |
---|
433 | return 0; |
---|
434 | } |
---|
435 | it = cache->lru_superstate; |
---|
436 | it->next_recyclable->prev_recyclable = it->prev_recyclable; |
---|
437 | it->prev_recyclable->next_recyclable = it->next_recyclable; |
---|
438 | cache->lru_superstate = ((it == it->next_recyclable) |
---|
439 | ? 0 |
---|
440 | : it->next_recyclable); |
---|
441 | } |
---|
442 | #endif |
---|
443 | |
---|
444 | if (!cache->semifree_superstate) |
---|
445 | return 0; |
---|
446 | |
---|
447 | { |
---|
448 | it = cache->semifree_superstate; |
---|
449 | it->next_recyclable->prev_recyclable = it->prev_recyclable; |
---|
450 | it->prev_recyclable->next_recyclable = it->next_recyclable; |
---|
451 | cache->semifree_superstate = ((it == it->next_recyclable) |
---|
452 | ? 0 |
---|
453 | : it->next_recyclable); |
---|
454 | --cache->semifree_superstates; |
---|
455 | } |
---|
456 | |
---|
457 | |
---|
458 | if (it->transition_refs) |
---|
459 | { |
---|
460 | struct rx_distinct_future *df; |
---|
461 | for (df = it->transition_refs, |
---|
462 | df->prev_same_dest->next_same_dest = 0; |
---|
463 | df; |
---|
464 | df = df->next_same_dest) |
---|
465 | { |
---|
466 | df->future_frame.inx = cache->instruction_table[rx_cache_miss]; |
---|
467 | df->future_frame.data = 0; |
---|
468 | df->future_frame.data_2 = (void *) df; |
---|
469 | df->future = 0; |
---|
470 | } |
---|
471 | it->transition_refs->prev_same_dest->next_same_dest = |
---|
472 | it->transition_refs; |
---|
473 | } |
---|
474 | { |
---|
475 | struct rx_super_edge *tc = it->edges; |
---|
476 | while (tc) |
---|
477 | { |
---|
478 | struct rx_distinct_future * df; |
---|
479 | struct rx_super_edge *tct = tc->next; |
---|
480 | df = tc->options; |
---|
481 | df->next_same_super_edge[1]->next_same_super_edge[0] = 0; |
---|
482 | while (df) |
---|
483 | { |
---|
484 | struct rx_distinct_future *dft = df; |
---|
485 | df = df->next_same_super_edge[0]; |
---|
486 | |
---|
487 | |
---|
488 | if (dft->future && dft->future->transition_refs == dft) |
---|
489 | { |
---|
490 | dft->future->transition_refs = dft->next_same_dest; |
---|
491 | if (dft->future->transition_refs == dft) |
---|
492 | dft->future->transition_refs = 0; |
---|
493 | } |
---|
494 | dft->next_same_dest->prev_same_dest = dft->prev_same_dest; |
---|
495 | dft->prev_same_dest->next_same_dest = dft->next_same_dest; |
---|
496 | rx_cache_free (cache, |
---|
497 | sizeof (struct rx_distinct_future), |
---|
498 | (char *)dft); |
---|
499 | } |
---|
500 | rx_cache_free (cache, |
---|
501 | sizeof (struct rx_super_edge), |
---|
502 | (char *)tc); |
---|
503 | tc = tct; |
---|
504 | } |
---|
505 | } |
---|
506 | |
---|
507 | if (it->contents->superstate == it) |
---|
508 | it->contents->superstate = 0; |
---|
509 | release_superset_low (cache, it->contents); |
---|
510 | rx_cache_free (cache, |
---|
511 | (sizeof (struct rx_superstate) |
---|
512 | + cache->local_cset_size * sizeof (struct rx_inx)), |
---|
513 | (char *)it); |
---|
514 | --cache->superstates; |
---|
515 | return 1; |
---|
516 | } |
---|
517 | |
---|
518 | |
---|
519 | #ifdef __STDC__ |
---|
520 | static char * |
---|
521 | rx_cache_malloc_or_get (struct rx_cache * cache, int size) |
---|
522 | #else |
---|
523 | static char * |
---|
524 | rx_cache_malloc_or_get (cache, size) |
---|
525 | struct rx_cache * cache; |
---|
526 | int size; |
---|
527 | #endif |
---|
528 | { |
---|
529 | while ( (cache->bytes_used + size > cache->bytes_allowed) |
---|
530 | && rx_really_free_superstate (cache)) |
---|
531 | ; |
---|
532 | |
---|
533 | return rx_cache_malloc (cache, size); |
---|
534 | } |
---|
535 | |
---|
536 | |
---|
537 | |
---|
538 | #ifdef __STDC__ |
---|
539 | static int |
---|
540 | supersetcmp (void * va, void * vb) |
---|
541 | #else |
---|
542 | static int |
---|
543 | supersetcmp (va, vb) |
---|
544 | void * va; |
---|
545 | void * vb; |
---|
546 | #endif |
---|
547 | { |
---|
548 | struct rx_superset * a = (struct rx_superset *)va; |
---|
549 | struct rx_superset * b = (struct rx_superset *)vb; |
---|
550 | return ( (a == b) |
---|
551 | || (a && b && (a->id == b->id) && (a->car == b->car) && (a->cdr == b->cdr))); |
---|
552 | } |
---|
553 | |
---|
554 | #define rx_abs(A) (((A) > 0) ? (A) : -(A)) |
---|
555 | |
---|
556 | |
---|
557 | #ifdef __STDC__ |
---|
558 | static struct rx_hash_item * |
---|
559 | superset_allocator (struct rx_hash_rules * rules, void * val) |
---|
560 | #else |
---|
561 | static struct rx_hash_item * |
---|
562 | superset_allocator (rules, val) |
---|
563 | struct rx_hash_rules * rules; |
---|
564 | void * val; |
---|
565 | #endif |
---|
566 | { |
---|
567 | struct rx_cache * cache; |
---|
568 | struct rx_superset * template; |
---|
569 | struct rx_superset * newset; |
---|
570 | |
---|
571 | cache = ((struct rx_cache *) |
---|
572 | ((char *)rules |
---|
573 | - (unsigned long)(&((struct rx_cache *)0)->superset_hash_rules))); |
---|
574 | template = (struct rx_superset *)val; |
---|
575 | newset = ((struct rx_superset *) |
---|
576 | rx_cache_malloc (cache, sizeof (*template))); |
---|
577 | if (!newset) |
---|
578 | return 0; |
---|
579 | { |
---|
580 | int cdrfinal; |
---|
581 | int cdredges; |
---|
582 | |
---|
583 | cdrfinal = (template->cdr |
---|
584 | ? template->cdr->is_final |
---|
585 | : 0); |
---|
586 | cdredges = (template->cdr |
---|
587 | ? template->cdr->has_cset_edges |
---|
588 | : 0); |
---|
589 | |
---|
590 | newset->is_final = (rx_abs (template->car->is_final) > rx_abs(cdrfinal) |
---|
591 | ? template->car->is_final |
---|
592 | : template->cdr->is_final); |
---|
593 | newset->has_cset_edges = (template->car->has_cset_edges || cdredges); |
---|
594 | } |
---|
595 | newset->refs = 0; |
---|
596 | newset->id = template->id; |
---|
597 | newset->car = template->car; |
---|
598 | newset->cdr = template->cdr; |
---|
599 | rx_protect_superset (rx, template->cdr); |
---|
600 | newset->superstate = 0; |
---|
601 | newset->starts_for = 0; |
---|
602 | newset->hash_item.data = (void *)newset; |
---|
603 | newset->hash_item.binding = 0; |
---|
604 | return &newset->hash_item; |
---|
605 | } |
---|
606 | |
---|
607 | #ifdef __STDC__ |
---|
608 | static struct rx_hash * |
---|
609 | super_hash_allocator (struct rx_hash_rules * rules) |
---|
610 | #else |
---|
611 | static struct rx_hash * |
---|
612 | super_hash_allocator (rules) |
---|
613 | struct rx_hash_rules * rules; |
---|
614 | #endif |
---|
615 | { |
---|
616 | struct rx_cache * cache; |
---|
617 | cache = ((struct rx_cache *) |
---|
618 | ((char *)rules |
---|
619 | - (unsigned long)(&((struct rx_cache *)0)->superset_hash_rules))); |
---|
620 | return ((struct rx_hash *) |
---|
621 | rx_cache_malloc (cache, sizeof (struct rx_hash))); |
---|
622 | } |
---|
623 | |
---|
624 | |
---|
625 | #ifdef __STDC__ |
---|
626 | static void |
---|
627 | super_hash_liberator (struct rx_hash * hash, struct rx_hash_rules * rules) |
---|
628 | #else |
---|
629 | static void |
---|
630 | super_hash_liberator (hash, rules) |
---|
631 | struct rx_hash * hash; |
---|
632 | struct rx_hash_rules * rules; |
---|
633 | #endif |
---|
634 | { |
---|
635 | struct rx_cache * cache |
---|
636 | = ((struct rx_cache *) |
---|
637 | (char *)rules - (long)(&((struct rx_cache *)0)->superset_hash_rules)); |
---|
638 | rx_cache_free (cache, sizeof (struct rx_hash), (char *)hash); |
---|
639 | } |
---|
640 | |
---|
641 | #ifdef __STDC__ |
---|
642 | static void |
---|
643 | superset_hash_item_liberator (struct rx_hash_item * it, |
---|
644 | struct rx_hash_rules * rules) |
---|
645 | #else |
---|
646 | static void |
---|
647 | superset_hash_item_liberator (it, rules) |
---|
648 | struct rx_hash_item * it; |
---|
649 | struct rx_hash_rules * rules; |
---|
650 | #endif |
---|
651 | { |
---|
652 | } |
---|
653 | |
---|
654 | int rx_cache_bound = 3; |
---|
655 | static int rx_default_cache_got = 0; |
---|
656 | |
---|
657 | static struct rx_cache default_cache = |
---|
658 | { |
---|
659 | { |
---|
660 | supersetcmp, |
---|
661 | super_hash_allocator, |
---|
662 | super_hash_liberator, |
---|
663 | superset_allocator, |
---|
664 | superset_hash_item_liberator, |
---|
665 | }, |
---|
666 | 0, |
---|
667 | 0, |
---|
668 | 0, |
---|
669 | 0, |
---|
670 | 0, |
---|
671 | 0, |
---|
672 | 0, |
---|
673 | RX_DEFAULT_DFA_CACHE_SIZE, |
---|
674 | 0, |
---|
675 | 256, |
---|
676 | rx_id_instruction_table, |
---|
677 | { |
---|
678 | 0, |
---|
679 | 0, |
---|
680 | 0, |
---|
681 | 0, |
---|
682 | 0 |
---|
683 | } |
---|
684 | }; |
---|
685 | struct rx_cache * rx_default_cache = &default_cache; |
---|
686 | |
---|
687 | /* This adds an element to a superstate set. These sets are lists, such |
---|
688 | * that lists with == elements are ==. The empty set is returned by |
---|
689 | * superset_cons (rx, 0, 0) and is NOT equivelent to |
---|
690 | * (struct rx_superset)0. |
---|
691 | */ |
---|
692 | |
---|
693 | #ifdef __STDC__ |
---|
694 | struct rx_superset * |
---|
695 | rx_superset_cons (struct rx * rx, |
---|
696 | struct rx_nfa_state *car, struct rx_superset *cdr) |
---|
697 | #else |
---|
698 | struct rx_superset * |
---|
699 | rx_superset_cons (rx, car, cdr) |
---|
700 | struct rx * rx; |
---|
701 | struct rx_nfa_state *car; |
---|
702 | struct rx_superset *cdr; |
---|
703 | #endif |
---|
704 | { |
---|
705 | struct rx_cache * cache = rx->cache; |
---|
706 | if (!car && !cdr) |
---|
707 | { |
---|
708 | if (!cache->empty_superset) |
---|
709 | { |
---|
710 | cache->empty_superset |
---|
711 | = ((struct rx_superset *) |
---|
712 | rx_cache_malloc (cache, sizeof (struct rx_superset))); |
---|
713 | if (!cache->empty_superset) |
---|
714 | return 0; |
---|
715 | rx_bzero ((char *)cache->empty_superset, sizeof (struct rx_superset)); |
---|
716 | cache->empty_superset->refs = 1000; |
---|
717 | } |
---|
718 | return cache->empty_superset; |
---|
719 | } |
---|
720 | { |
---|
721 | struct rx_superset template; |
---|
722 | struct rx_hash_item * hit; |
---|
723 | template.car = car; |
---|
724 | template.cdr = cdr; |
---|
725 | template.id = rx->rx_id; |
---|
726 | rx_protect_superset (rx, template.cdr); |
---|
727 | hit = rx_hash_store (&cache->superset_table, |
---|
728 | (unsigned long)car ^ car->id ^ (unsigned long)cdr, |
---|
729 | (void *)&template, |
---|
730 | &cache->superset_hash_rules); |
---|
731 | rx_protect_superset (rx, template.cdr); |
---|
732 | return (hit |
---|
733 | ? (struct rx_superset *)hit->data |
---|
734 | : 0); |
---|
735 | } |
---|
736 | } |
---|
737 | |
---|
738 | /* This computes a union of two NFA state sets. The sets do not have the |
---|
739 | * same representation though. One is a RX_SUPERSET structure (part |
---|
740 | * of the superstate NFA) and the other is an NFA_STATE_SET (part of the NFA). |
---|
741 | */ |
---|
742 | |
---|
743 | #ifdef __STDC__ |
---|
744 | struct rx_superset * |
---|
745 | rx_superstate_eclosure_union (struct rx * rx, struct rx_superset *set, struct rx_nfa_state_set *ecl) |
---|
746 | #else |
---|
747 | struct rx_superset * |
---|
748 | rx_superstate_eclosure_union (rx, set, ecl) |
---|
749 | struct rx * rx; |
---|
750 | struct rx_superset *set; |
---|
751 | struct rx_nfa_state_set *ecl; |
---|
752 | #endif |
---|
753 | { |
---|
754 | if (!ecl) |
---|
755 | return set; |
---|
756 | |
---|
757 | if (!set->car) |
---|
758 | return rx_superset_cons (rx, ecl->car, |
---|
759 | rx_superstate_eclosure_union (rx, set, ecl->cdr)); |
---|
760 | if (set->car == ecl->car) |
---|
761 | return rx_superstate_eclosure_union (rx, set, ecl->cdr); |
---|
762 | |
---|
763 | { |
---|
764 | struct rx_superset * tail; |
---|
765 | struct rx_nfa_state * first; |
---|
766 | |
---|
767 | if (set->car->id < ecl->car->id) |
---|
768 | { |
---|
769 | tail = rx_superstate_eclosure_union (rx, set->cdr, ecl); |
---|
770 | first = set->car; |
---|
771 | } |
---|
772 | else |
---|
773 | { |
---|
774 | tail = rx_superstate_eclosure_union (rx, set, ecl->cdr); |
---|
775 | first = ecl->car; |
---|
776 | } |
---|
777 | if (!tail) |
---|
778 | return 0; |
---|
779 | else |
---|
780 | { |
---|
781 | struct rx_superset * answer; |
---|
782 | answer = rx_superset_cons (rx, first, tail); |
---|
783 | if (!answer) |
---|
784 | { |
---|
785 | rx_protect_superset (rx, tail); |
---|
786 | rx_release_superset (rx, tail); |
---|
787 | return 0; |
---|
788 | } |
---|
789 | else |
---|
790 | return answer; |
---|
791 | } |
---|
792 | } |
---|
793 | } |
---|
794 | |
---|
795 | |
---|
796 | |
---|
797 | |
---|
798 | /* |
---|
799 | * This makes sure that a list of rx_distinct_futures contains |
---|
800 | * a future for each possible set of side effects in the eclosure |
---|
801 | * of a given state. This is some of the work of filling in a |
---|
802 | * superstate transition. |
---|
803 | */ |
---|
804 | |
---|
805 | #ifdef __STDC__ |
---|
806 | static struct rx_distinct_future * |
---|
807 | include_futures (struct rx *rx, |
---|
808 | struct rx_distinct_future *df, struct rx_nfa_state |
---|
809 | *state, struct rx_superstate *superstate) |
---|
810 | #else |
---|
811 | static struct rx_distinct_future * |
---|
812 | include_futures (rx, df, state, superstate) |
---|
813 | struct rx *rx; |
---|
814 | struct rx_distinct_future *df; |
---|
815 | struct rx_nfa_state *state; |
---|
816 | struct rx_superstate *superstate; |
---|
817 | #endif |
---|
818 | { |
---|
819 | struct rx_possible_future *future; |
---|
820 | struct rx_cache * cache = rx->cache; |
---|
821 | for (future = rx_state_possible_futures (rx, state); future; future = future->next) |
---|
822 | { |
---|
823 | struct rx_distinct_future *dfp; |
---|
824 | struct rx_distinct_future *insert_before = 0; |
---|
825 | if (df) |
---|
826 | df->next_same_super_edge[1]->next_same_super_edge[0] = 0; |
---|
827 | for (dfp = df; dfp; dfp = dfp->next_same_super_edge[0]) |
---|
828 | if (dfp->effects == future->effects) |
---|
829 | break; |
---|
830 | else |
---|
831 | { |
---|
832 | int order = rx->se_list_cmp (rx, dfp->effects, future->effects); |
---|
833 | if (order > 0) |
---|
834 | { |
---|
835 | insert_before = dfp; |
---|
836 | dfp = 0; |
---|
837 | break; |
---|
838 | } |
---|
839 | } |
---|
840 | if (df) |
---|
841 | df->next_same_super_edge[1]->next_same_super_edge[0] = df; |
---|
842 | if (!dfp) |
---|
843 | { |
---|
844 | dfp |
---|
845 | = ((struct rx_distinct_future *) |
---|
846 | rx_cache_malloc (cache, |
---|
847 | sizeof (struct rx_distinct_future))); |
---|
848 | if (!dfp) |
---|
849 | return 0; |
---|
850 | if (!df) |
---|
851 | { |
---|
852 | df = insert_before = dfp; |
---|
853 | df->next_same_super_edge[0] = df->next_same_super_edge[1] = df; |
---|
854 | } |
---|
855 | else if (!insert_before) |
---|
856 | insert_before = df; |
---|
857 | else if (insert_before == df) |
---|
858 | df = dfp; |
---|
859 | |
---|
860 | dfp->next_same_super_edge[0] = insert_before; |
---|
861 | dfp->next_same_super_edge[1] |
---|
862 | = insert_before->next_same_super_edge[1]; |
---|
863 | dfp->next_same_super_edge[1]->next_same_super_edge[0] = dfp; |
---|
864 | dfp->next_same_super_edge[0]->next_same_super_edge[1] = dfp; |
---|
865 | dfp->next_same_dest = dfp->prev_same_dest = dfp; |
---|
866 | dfp->future = 0; |
---|
867 | dfp->present = superstate; |
---|
868 | dfp->future_frame.inx = rx->instruction_table[rx_cache_miss]; |
---|
869 | dfp->future_frame.data = 0; |
---|
870 | dfp->future_frame.data_2 = (void *) dfp; |
---|
871 | dfp->side_effects_frame.inx |
---|
872 | = rx->instruction_table[rx_do_side_effects]; |
---|
873 | dfp->side_effects_frame.data = 0; |
---|
874 | dfp->side_effects_frame.data_2 = (void *) dfp; |
---|
875 | dfp->effects = future->effects; |
---|
876 | } |
---|
877 | } |
---|
878 | return df; |
---|
879 | } |
---|
880 | |
---|
881 | |
---|
882 | |
---|
883 | /* This constructs a new superstate from its state set. The only |
---|
884 | * complexity here is memory management. |
---|
885 | */ |
---|
886 | #ifdef __STDC__ |
---|
887 | struct rx_superstate * |
---|
888 | rx_superstate (struct rx *rx, |
---|
889 | struct rx_superset *set) |
---|
890 | #else |
---|
891 | struct rx_superstate * |
---|
892 | rx_superstate (rx, set) |
---|
893 | struct rx *rx; |
---|
894 | struct rx_superset *set; |
---|
895 | #endif |
---|
896 | { |
---|
897 | struct rx_cache * cache = rx->cache; |
---|
898 | struct rx_superstate * superstate = 0; |
---|
899 | |
---|
900 | /* Does the superstate already exist in the cache? */ |
---|
901 | if (set->superstate) |
---|
902 | { |
---|
903 | if (set->superstate->rx_id != rx->rx_id) |
---|
904 | { |
---|
905 | /* Aha. It is in the cache, but belongs to a superstate |
---|
906 | * that refers to an NFA that no longer exists. |
---|
907 | * (We know it no longer exists because it was evidently |
---|
908 | * stored in the same region of memory as the current nfa |
---|
909 | * yet it has a different id.) |
---|
910 | */ |
---|
911 | superstate = set->superstate; |
---|
912 | if (!superstate->is_semifree) |
---|
913 | { |
---|
914 | if (cache->lru_superstate == superstate) |
---|
915 | { |
---|
916 | cache->lru_superstate = superstate->next_recyclable; |
---|
917 | if (cache->lru_superstate == superstate) |
---|
918 | cache->lru_superstate = 0; |
---|
919 | } |
---|
920 | { |
---|
921 | superstate->next_recyclable->prev_recyclable |
---|
922 | = superstate->prev_recyclable; |
---|
923 | superstate->prev_recyclable->next_recyclable |
---|
924 | = superstate->next_recyclable; |
---|
925 | if (!cache->semifree_superstate) |
---|
926 | { |
---|
927 | (cache->semifree_superstate |
---|
928 | = superstate->next_recyclable |
---|
929 | = superstate->prev_recyclable |
---|
930 | = superstate); |
---|
931 | } |
---|
932 | else |
---|
933 | { |
---|
934 | superstate->next_recyclable = cache->semifree_superstate; |
---|
935 | superstate->prev_recyclable |
---|
936 | = cache->semifree_superstate->prev_recyclable; |
---|
937 | superstate->next_recyclable->prev_recyclable |
---|
938 | = superstate; |
---|
939 | superstate->prev_recyclable->next_recyclable |
---|
940 | = superstate; |
---|
941 | cache->semifree_superstate = superstate; |
---|
942 | } |
---|
943 | ++cache->semifree_superstates; |
---|
944 | } |
---|
945 | } |
---|
946 | set->superstate = 0; |
---|
947 | goto handle_cache_miss; |
---|
948 | } |
---|
949 | ++cache->hits; |
---|
950 | superstate = set->superstate; |
---|
951 | |
---|
952 | rx_refresh_this_superstate (cache, superstate); |
---|
953 | return superstate; |
---|
954 | } |
---|
955 | |
---|
956 | handle_cache_miss: |
---|
957 | |
---|
958 | /* This point reached only for cache misses. */ |
---|
959 | ++cache->misses; |
---|
960 | #if RX_DEBUG |
---|
961 | if (rx_debug_trace > 1) |
---|
962 | { |
---|
963 | struct rx_superset * setp = set; |
---|
964 | fprintf (stderr, "Building a superstet %d(%d): ", rx->rx_id, set); |
---|
965 | while (setp) |
---|
966 | { |
---|
967 | fprintf (stderr, "%d ", setp->id); |
---|
968 | setp = setp->cdr; |
---|
969 | } |
---|
970 | fprintf (stderr, "(%d)\n", set); |
---|
971 | } |
---|
972 | #endif |
---|
973 | |
---|
974 | { |
---|
975 | int superstate_size; |
---|
976 | superstate_size = ( sizeof (*superstate) |
---|
977 | + ( sizeof (struct rx_inx) |
---|
978 | * rx->local_cset_size)); |
---|
979 | superstate = ((struct rx_superstate *) |
---|
980 | rx_cache_malloc_or_get (cache, superstate_size)); |
---|
981 | ++cache->superstates; |
---|
982 | } |
---|
983 | |
---|
984 | if (!superstate) |
---|
985 | return 0; |
---|
986 | |
---|
987 | if (!cache->lru_superstate) |
---|
988 | (cache->lru_superstate |
---|
989 | = superstate->next_recyclable |
---|
990 | = superstate->prev_recyclable |
---|
991 | = superstate); |
---|
992 | else |
---|
993 | { |
---|
994 | superstate->next_recyclable = cache->lru_superstate; |
---|
995 | superstate->prev_recyclable = cache->lru_superstate->prev_recyclable; |
---|
996 | ( superstate->prev_recyclable->next_recyclable |
---|
997 | = superstate->next_recyclable->prev_recyclable |
---|
998 | = superstate); |
---|
999 | } |
---|
1000 | superstate->rx_id = rx->rx_id; |
---|
1001 | superstate->transition_refs = 0; |
---|
1002 | superstate->locks = 0; |
---|
1003 | superstate->is_semifree = 0; |
---|
1004 | set->superstate = superstate; |
---|
1005 | superstate->contents = set; |
---|
1006 | rx_protect_superset (rx, set); |
---|
1007 | superstate->edges = 0; |
---|
1008 | { |
---|
1009 | int x; |
---|
1010 | /* None of the transitions from this superstate are known yet. */ |
---|
1011 | for (x = 0; x < rx->local_cset_size; ++x) |
---|
1012 | { |
---|
1013 | struct rx_inx * ifr = &superstate->transitions[x]; |
---|
1014 | ifr->inx = rx->instruction_table [rx_cache_miss]; |
---|
1015 | ifr->data = ifr->data_2 = 0; |
---|
1016 | } |
---|
1017 | } |
---|
1018 | return superstate; |
---|
1019 | } |
---|
1020 | |
---|
1021 | |
---|
1022 | /* This computes the destination set of one edge of the superstate NFA. |
---|
1023 | * Note that a RX_DISTINCT_FUTURE is a superstate edge. |
---|
1024 | * Returns 0 on an allocation failure. |
---|
1025 | */ |
---|
1026 | |
---|
1027 | #ifdef __STDC__ |
---|
1028 | static int |
---|
1029 | solve_destination (struct rx *rx, struct rx_distinct_future *df) |
---|
1030 | #else |
---|
1031 | static int |
---|
1032 | solve_destination (rx, df) |
---|
1033 | struct rx *rx; |
---|
1034 | struct rx_distinct_future *df; |
---|
1035 | #endif |
---|
1036 | { |
---|
1037 | struct rx_super_edge *tc = df->edge; |
---|
1038 | struct rx_superset *nfa_state; |
---|
1039 | struct rx_superset *nil_set = rx_superset_cons (rx, 0, 0); |
---|
1040 | struct rx_superset *solution = nil_set; |
---|
1041 | struct rx_superstate *dest; |
---|
1042 | |
---|
1043 | rx_protect_superset (rx, solution); |
---|
1044 | /* Iterate over all NFA states in the state set of this superstate. */ |
---|
1045 | for (nfa_state = df->present->contents; |
---|
1046 | nfa_state->car; |
---|
1047 | nfa_state = nfa_state->cdr) |
---|
1048 | { |
---|
1049 | struct rx_nfa_edge *e; |
---|
1050 | /* Iterate over all edges of each NFA state. */ |
---|
1051 | for (e = nfa_state->car->edges; e; e = e->next) |
---|
1052 | /* If we find an edge that is labeled with |
---|
1053 | * the characters we are solving for..... |
---|
1054 | */ |
---|
1055 | if ((e->type == ne_cset) |
---|
1056 | && rx_bitset_is_subset (rx->local_cset_size, |
---|
1057 | tc->cset, |
---|
1058 | e->params.cset)) |
---|
1059 | { |
---|
1060 | struct rx_nfa_state *n = e->dest; |
---|
1061 | struct rx_possible_future *pf; |
---|
1062 | /* ....search the partial epsilon closures of the destination |
---|
1063 | * of that edge for a path that involves the same set of |
---|
1064 | * side effects we are solving for. |
---|
1065 | * If we find such a RX_POSSIBLE_FUTURE, we add members to the |
---|
1066 | * stateset we are computing. |
---|
1067 | */ |
---|
1068 | for (pf = rx_state_possible_futures (rx, n); pf; pf = pf->next) |
---|
1069 | if (pf->effects == df->effects) |
---|
1070 | { |
---|
1071 | struct rx_superset * old_sol; |
---|
1072 | old_sol = solution; |
---|
1073 | solution = rx_superstate_eclosure_union (rx, solution, |
---|
1074 | pf->destset); |
---|
1075 | if (!solution) |
---|
1076 | return 0; |
---|
1077 | rx_protect_superset (rx, solution); |
---|
1078 | rx_release_superset (rx, old_sol); |
---|
1079 | } |
---|
1080 | } |
---|
1081 | } |
---|
1082 | /* It is possible that the RX_DISTINCT_FUTURE we are working on has |
---|
1083 | * the empty set of NFA states as its definition. In that case, this |
---|
1084 | * is a failure point. |
---|
1085 | */ |
---|
1086 | if (solution == nil_set) |
---|
1087 | { |
---|
1088 | df->future_frame.inx = (void *) rx_backtrack; |
---|
1089 | df->future_frame.data = 0; |
---|
1090 | df->future_frame.data_2 = 0; |
---|
1091 | return 1; |
---|
1092 | } |
---|
1093 | dest = rx_superstate (rx, solution); |
---|
1094 | rx_release_superset (rx, solution); |
---|
1095 | if (!dest) |
---|
1096 | return 0; |
---|
1097 | |
---|
1098 | { |
---|
1099 | struct rx_distinct_future *dft; |
---|
1100 | dft = df; |
---|
1101 | df->prev_same_dest->next_same_dest = 0; |
---|
1102 | while (dft) |
---|
1103 | { |
---|
1104 | dft->future = dest; |
---|
1105 | dft->future_frame.inx = rx->instruction_table[rx_next_char]; |
---|
1106 | dft->future_frame.data = (void *) dest->transitions; |
---|
1107 | dft->future_frame.data_2 = (void *) dest->contents->is_final; |
---|
1108 | dft = dft->next_same_dest; |
---|
1109 | } |
---|
1110 | df->prev_same_dest->next_same_dest = df; |
---|
1111 | } |
---|
1112 | if (!dest->transition_refs) |
---|
1113 | dest->transition_refs = df; |
---|
1114 | else |
---|
1115 | { |
---|
1116 | struct rx_distinct_future *dft; |
---|
1117 | dft = dest->transition_refs->next_same_dest; |
---|
1118 | dest->transition_refs->next_same_dest = df->next_same_dest; |
---|
1119 | df->next_same_dest->prev_same_dest = dest->transition_refs; |
---|
1120 | df->next_same_dest = dft; |
---|
1121 | dft->prev_same_dest = df; |
---|
1122 | } |
---|
1123 | return 1; |
---|
1124 | } |
---|
1125 | |
---|
1126 | |
---|
1127 | /* This takes a superstate and a character, and computes some edges |
---|
1128 | * from the superstate NFA. In particular, this computes all edges |
---|
1129 | * that lead from SUPERSTATE given CHR. This function also |
---|
1130 | * computes the set of characters that share this edge set. |
---|
1131 | * This returns 0 on allocation error. |
---|
1132 | * The character set and list of edges are returned through |
---|
1133 | * the paramters CSETOUT and DFOUT. |
---|
1134 | */ |
---|
1135 | |
---|
1136 | #ifdef __STDC__ |
---|
1137 | static int |
---|
1138 | compute_super_edge (struct rx *rx, struct rx_distinct_future **dfout, |
---|
1139 | rx_Bitset csetout, struct rx_superstate *superstate, |
---|
1140 | unsigned char chr) |
---|
1141 | #else |
---|
1142 | static int |
---|
1143 | compute_super_edge (rx, dfout, csetout, superstate, chr) |
---|
1144 | struct rx *rx; |
---|
1145 | struct rx_distinct_future **dfout; |
---|
1146 | rx_Bitset csetout; |
---|
1147 | struct rx_superstate *superstate; |
---|
1148 | unsigned char chr; |
---|
1149 | #endif |
---|
1150 | { |
---|
1151 | struct rx_superset *stateset = superstate->contents; |
---|
1152 | |
---|
1153 | /* To compute the set of characters that share edges with CHR, |
---|
1154 | * we start with the full character set, and subtract. |
---|
1155 | */ |
---|
1156 | rx_bitset_universe (rx->local_cset_size, csetout); |
---|
1157 | *dfout = 0; |
---|
1158 | |
---|
1159 | /* Iterate over the NFA states in the superstate state-set. */ |
---|
1160 | while (stateset->car) |
---|
1161 | { |
---|
1162 | struct rx_nfa_edge *e; |
---|
1163 | for (e = stateset->car->edges; e; e = e->next) |
---|
1164 | if (e->type == ne_cset) |
---|
1165 | { |
---|
1166 | if (!RX_bitset_member (e->params.cset, chr)) |
---|
1167 | /* An edge that doesn't apply at least tells us some characters |
---|
1168 | * that don't share the same edge set as CHR. |
---|
1169 | */ |
---|
1170 | rx_bitset_difference (rx->local_cset_size, csetout, e->params.cset); |
---|
1171 | else |
---|
1172 | { |
---|
1173 | /* If we find an NFA edge that applies, we make sure there |
---|
1174 | * are corresponding edges in the superstate NFA. |
---|
1175 | */ |
---|
1176 | { |
---|
1177 | struct rx_distinct_future * saved; |
---|
1178 | saved = *dfout; |
---|
1179 | *dfout = include_futures (rx, *dfout, e->dest, superstate); |
---|
1180 | if (!*dfout) |
---|
1181 | { |
---|
1182 | struct rx_distinct_future * df; |
---|
1183 | df = saved; |
---|
1184 | if (df) |
---|
1185 | df->next_same_super_edge[1]->next_same_super_edge[0] = 0; |
---|
1186 | while (df) |
---|
1187 | { |
---|
1188 | struct rx_distinct_future *dft; |
---|
1189 | dft = df; |
---|
1190 | df = df->next_same_super_edge[0]; |
---|
1191 | |
---|
1192 | if (dft->future && dft->future->transition_refs == dft) |
---|
1193 | { |
---|
1194 | dft->future->transition_refs = dft->next_same_dest; |
---|
1195 | if (dft->future->transition_refs == dft) |
---|
1196 | dft->future->transition_refs = 0; |
---|
1197 | } |
---|
1198 | dft->next_same_dest->prev_same_dest = dft->prev_same_dest; |
---|
1199 | dft->prev_same_dest->next_same_dest = dft->next_same_dest; |
---|
1200 | rx_cache_free (rx->cache, sizeof (*dft), (char *)dft); |
---|
1201 | } |
---|
1202 | return 0; |
---|
1203 | } |
---|
1204 | } |
---|
1205 | /* We also trim the character set a bit. */ |
---|
1206 | rx_bitset_intersection (rx->local_cset_size, |
---|
1207 | csetout, e->params.cset); |
---|
1208 | } |
---|
1209 | } |
---|
1210 | stateset = stateset->cdr; |
---|
1211 | } |
---|
1212 | return 1; |
---|
1213 | } |
---|
1214 | |
---|
1215 | |
---|
1216 | /* This is a constructor for RX_SUPER_EDGE structures. These are |
---|
1217 | * wrappers for lists of superstate NFA edges that share character sets labels. |
---|
1218 | * If a transition class contains more than one rx_distinct_future (superstate |
---|
1219 | * edge), then it represents a non-determinism in the superstate NFA. |
---|
1220 | */ |
---|
1221 | |
---|
1222 | #ifdef __STDC__ |
---|
1223 | static struct rx_super_edge * |
---|
1224 | rx_super_edge (struct rx *rx, |
---|
1225 | struct rx_superstate *super, rx_Bitset cset, |
---|
1226 | struct rx_distinct_future *df) |
---|
1227 | #else |
---|
1228 | static struct rx_super_edge * |
---|
1229 | rx_super_edge (rx, super, cset, df) |
---|
1230 | struct rx *rx; |
---|
1231 | struct rx_superstate *super; |
---|
1232 | rx_Bitset cset; |
---|
1233 | struct rx_distinct_future *df; |
---|
1234 | #endif |
---|
1235 | { |
---|
1236 | struct rx_super_edge *tc; |
---|
1237 | int tc_size; |
---|
1238 | |
---|
1239 | tc_size = ( sizeof (struct rx_super_edge) |
---|
1240 | + rx_sizeof_bitset (rx->local_cset_size)); |
---|
1241 | |
---|
1242 | tc = ((struct rx_super_edge *) |
---|
1243 | rx_cache_malloc (rx->cache, tc_size)); |
---|
1244 | |
---|
1245 | if (!tc) |
---|
1246 | return 0; |
---|
1247 | |
---|
1248 | tc->next = super->edges; |
---|
1249 | super->edges = tc; |
---|
1250 | tc->rx_backtrack_frame.inx = rx->instruction_table[rx_backtrack_point]; |
---|
1251 | tc->rx_backtrack_frame.data = 0; |
---|
1252 | tc->rx_backtrack_frame.data_2 = (void *) tc; |
---|
1253 | tc->options = df; |
---|
1254 | tc->cset = (rx_Bitset) ((char *) tc + sizeof (*tc)); |
---|
1255 | rx_bitset_assign (rx->local_cset_size, tc->cset, cset); |
---|
1256 | if (df) |
---|
1257 | { |
---|
1258 | struct rx_distinct_future * dfp = df; |
---|
1259 | df->next_same_super_edge[1]->next_same_super_edge[0] = 0; |
---|
1260 | while (dfp) |
---|
1261 | { |
---|
1262 | dfp->edge = tc; |
---|
1263 | dfp = dfp->next_same_super_edge[0]; |
---|
1264 | } |
---|
1265 | df->next_same_super_edge[1]->next_same_super_edge[0] = df; |
---|
1266 | } |
---|
1267 | return tc; |
---|
1268 | } |
---|
1269 | |
---|
1270 | |
---|
1271 | /* There are three kinds of cache miss. The first occurs when a |
---|
1272 | * transition is taken that has never been computed during the |
---|
1273 | * lifetime of the source superstate. That cache miss is handled by |
---|
1274 | * calling COMPUTE_SUPER_EDGE. The second kind of cache miss |
---|
1275 | * occurs when the destination superstate of a transition doesn't |
---|
1276 | * exist. SOLVE_DESTINATION is used to construct the destination superstate. |
---|
1277 | * Finally, the third kind of cache miss occurs when the destination |
---|
1278 | * superstate of a transition is in a `semi-free state'. That case is |
---|
1279 | * handled by UNFREE_SUPERSTATE. |
---|
1280 | * |
---|
1281 | * The function of HANDLE_CACHE_MISS is to figure out which of these |
---|
1282 | * cases applies. |
---|
1283 | */ |
---|
1284 | |
---|
1285 | #ifdef __STDC__ |
---|
1286 | static void |
---|
1287 | install_partial_transition (struct rx_superstate *super, |
---|
1288 | struct rx_inx *answer, |
---|
1289 | RX_subset set, int offset) |
---|
1290 | #else |
---|
1291 | static void |
---|
1292 | install_partial_transition (super, answer, set, offset) |
---|
1293 | struct rx_superstate *super; |
---|
1294 | struct rx_inx *answer; |
---|
1295 | RX_subset set; |
---|
1296 | int offset; |
---|
1297 | #endif |
---|
1298 | { |
---|
1299 | int start = offset; |
---|
1300 | int end = start + 32; |
---|
1301 | RX_subset pos = 1; |
---|
1302 | struct rx_inx * transitions = super->transitions; |
---|
1303 | |
---|
1304 | while (start < end) |
---|
1305 | { |
---|
1306 | if (set & pos) |
---|
1307 | transitions[start] = *answer; |
---|
1308 | pos <<= 1; |
---|
1309 | ++start; |
---|
1310 | } |
---|
1311 | } |
---|
1312 | |
---|
1313 | |
---|
1314 | #ifdef __STDC__ |
---|
1315 | struct rx_inx * |
---|
1316 | rx_handle_cache_miss (struct rx *rx, struct rx_superstate *super, unsigned char chr, void *data) |
---|
1317 | #else |
---|
1318 | struct rx_inx * |
---|
1319 | rx_handle_cache_miss (rx, super, chr, data) |
---|
1320 | struct rx *rx; |
---|
1321 | struct rx_superstate *super; |
---|
1322 | unsigned char chr; |
---|
1323 | void *data; |
---|
1324 | #endif |
---|
1325 | { |
---|
1326 | int offset = chr / RX_subset_bits; |
---|
1327 | struct rx_distinct_future *df = data; |
---|
1328 | |
---|
1329 | if (!df) /* must be the shared_cache_miss_frame */ |
---|
1330 | { |
---|
1331 | /* Perhaps this is just a transition waiting to be filled. */ |
---|
1332 | struct rx_super_edge *tc; |
---|
1333 | RX_subset mask = rx_subset_singletons [chr % RX_subset_bits]; |
---|
1334 | |
---|
1335 | for (tc = super->edges; tc; tc = tc->next) |
---|
1336 | if (tc->cset[offset] & mask) |
---|
1337 | { |
---|
1338 | struct rx_inx * answer; |
---|
1339 | df = tc->options; |
---|
1340 | answer = ((tc->options->next_same_super_edge[0] != tc->options) |
---|
1341 | ? &tc->rx_backtrack_frame |
---|
1342 | : (df->effects |
---|
1343 | ? &df->side_effects_frame |
---|
1344 | : &df->future_frame)); |
---|
1345 | install_partial_transition (super, answer, |
---|
1346 | tc->cset [offset], offset * 32); |
---|
1347 | return answer; |
---|
1348 | } |
---|
1349 | /* Otherwise, it's a flushed or newly encountered edge. */ |
---|
1350 | { |
---|
1351 | char cset_space[1024]; /* this limit is far from unreasonable */ |
---|
1352 | rx_Bitset trcset; |
---|
1353 | struct rx_inx *answer; |
---|
1354 | |
---|
1355 | if (rx_sizeof_bitset (rx->local_cset_size) > sizeof (cset_space)) |
---|
1356 | return 0; /* If the arbitrary limit is hit, always fail */ |
---|
1357 | /* cleanly. */ |
---|
1358 | trcset = (rx_Bitset)cset_space; |
---|
1359 | rx_lock_superstate (rx, super); |
---|
1360 | if (!compute_super_edge (rx, &df, trcset, super, chr)) |
---|
1361 | { |
---|
1362 | rx_unlock_superstate (rx, super); |
---|
1363 | return 0; |
---|
1364 | } |
---|
1365 | if (!df) /* We just computed the fail transition. */ |
---|
1366 | { |
---|
1367 | static struct rx_inx |
---|
1368 | shared_fail_frame = { 0, 0, (void *)rx_backtrack, 0 }; |
---|
1369 | answer = &shared_fail_frame; |
---|
1370 | } |
---|
1371 | else |
---|
1372 | { |
---|
1373 | tc = rx_super_edge (rx, super, trcset, df); |
---|
1374 | if (!tc) |
---|
1375 | { |
---|
1376 | rx_unlock_superstate (rx, super); |
---|
1377 | return 0; |
---|
1378 | } |
---|
1379 | answer = ((tc->options->next_same_super_edge[0] != tc->options) |
---|
1380 | ? &tc->rx_backtrack_frame |
---|
1381 | : (df->effects |
---|
1382 | ? &df->side_effects_frame |
---|
1383 | : &df->future_frame)); |
---|
1384 | } |
---|
1385 | install_partial_transition (super, answer, |
---|
1386 | trcset[offset], offset * 32); |
---|
1387 | rx_unlock_superstate (rx, super); |
---|
1388 | return answer; |
---|
1389 | } |
---|
1390 | } |
---|
1391 | else if (df->future) /* A cache miss on an edge with a future? Must be |
---|
1392 | * a semi-free destination. */ |
---|
1393 | { |
---|
1394 | if (df->future->is_semifree) |
---|
1395 | refresh_semifree_superstate (rx->cache, df->future); |
---|
1396 | return &df->future_frame; |
---|
1397 | } |
---|
1398 | else |
---|
1399 | /* no future superstate on an existing edge */ |
---|
1400 | { |
---|
1401 | rx_lock_superstate (rx, super); |
---|
1402 | if (!solve_destination (rx, df)) |
---|
1403 | { |
---|
1404 | rx_unlock_superstate (rx, super); |
---|
1405 | return 0; |
---|
1406 | } |
---|
1407 | if (!df->effects |
---|
1408 | && (df->edge->options->next_same_super_edge[0] == df->edge->options)) |
---|
1409 | install_partial_transition (super, &df->future_frame, |
---|
1410 | df->edge->cset[offset], offset * 32); |
---|
1411 | rx_unlock_superstate (rx, super); |
---|
1412 | return &df->future_frame; |
---|
1413 | } |
---|
1414 | } |
---|
1415 | |
---|
1416 | |
---|