1 | /* dfa - DFA construction routines */ |
---|
2 | |
---|
3 | /*- |
---|
4 | * Copyright (c) 1990 The Regents of the University of California. |
---|
5 | * All rights reserved. |
---|
6 | * |
---|
7 | * This code is derived from software contributed to Berkeley by |
---|
8 | * Vern Paxson. |
---|
9 | * |
---|
10 | * The United States Government has rights in this work pursuant |
---|
11 | * to contract no. DE-AC03-76SF00098 between the United States |
---|
12 | * Department of Energy and the University of California. |
---|
13 | * |
---|
14 | * Redistribution and use in source and binary forms with or without |
---|
15 | * modification are permitted provided that: (1) source distributions retain |
---|
16 | * this entire copyright notice and comment, and (2) distributions including |
---|
17 | * binaries display the following acknowledgement: ``This product includes |
---|
18 | * software developed by the University of California, Berkeley and its |
---|
19 | * contributors'' in the documentation or other materials provided with the |
---|
20 | * distribution and in all advertising materials mentioning features or use |
---|
21 | * of this software. Neither the name of the University nor the names of |
---|
22 | * its contributors may be used to endorse or promote products derived from |
---|
23 | * this software without specific prior written permission. |
---|
24 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED |
---|
25 | * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF |
---|
26 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. |
---|
27 | */ |
---|
28 | |
---|
29 | /* $Header: /afs/dev.mit.edu/source/repository/third/flex/dfa.c,v 1.1.1.1 2001-04-10 17:05:14 ghudson Exp $ */ |
---|
30 | |
---|
31 | #include "flexdef.h" |
---|
32 | |
---|
33 | |
---|
34 | /* declare functions that have forward references */ |
---|
35 | |
---|
36 | void dump_associated_rules PROTO((FILE*, int)); |
---|
37 | void dump_transitions PROTO((FILE*, int[])); |
---|
38 | void sympartition PROTO((int[], int, int[], int[])); |
---|
39 | int symfollowset PROTO((int[], int, int, int[])); |
---|
40 | |
---|
41 | |
---|
42 | /* check_for_backing_up - check a DFA state for backing up |
---|
43 | * |
---|
44 | * synopsis |
---|
45 | * void check_for_backing_up( int ds, int state[numecs] ); |
---|
46 | * |
---|
47 | * ds is the number of the state to check and state[] is its out-transitions, |
---|
48 | * indexed by equivalence class. |
---|
49 | */ |
---|
50 | |
---|
51 | void check_for_backing_up( ds, state ) |
---|
52 | int ds; |
---|
53 | int state[]; |
---|
54 | { |
---|
55 | if ( (reject && ! dfaacc[ds].dfaacc_set) || |
---|
56 | (! reject && ! dfaacc[ds].dfaacc_state) ) |
---|
57 | { /* state is non-accepting */ |
---|
58 | ++num_backing_up; |
---|
59 | |
---|
60 | if ( backing_up_report ) |
---|
61 | { |
---|
62 | fprintf( backing_up_file, |
---|
63 | _( "State #%d is non-accepting -\n" ), ds ); |
---|
64 | |
---|
65 | /* identify the state */ |
---|
66 | dump_associated_rules( backing_up_file, ds ); |
---|
67 | |
---|
68 | /* Now identify it further using the out- and |
---|
69 | * jam-transitions. |
---|
70 | */ |
---|
71 | dump_transitions( backing_up_file, state ); |
---|
72 | |
---|
73 | putc( '\n', backing_up_file ); |
---|
74 | } |
---|
75 | } |
---|
76 | } |
---|
77 | |
---|
78 | |
---|
79 | /* check_trailing_context - check to see if NFA state set constitutes |
---|
80 | * "dangerous" trailing context |
---|
81 | * |
---|
82 | * synopsis |
---|
83 | * void check_trailing_context( int nfa_states[num_states+1], int num_states, |
---|
84 | * int accset[nacc+1], int nacc ); |
---|
85 | * |
---|
86 | * NOTES |
---|
87 | * Trailing context is "dangerous" if both the head and the trailing |
---|
88 | * part are of variable size \and/ there's a DFA state which contains |
---|
89 | * both an accepting state for the head part of the rule and NFA states |
---|
90 | * which occur after the beginning of the trailing context. |
---|
91 | * |
---|
92 | * When such a rule is matched, it's impossible to tell if having been |
---|
93 | * in the DFA state indicates the beginning of the trailing context or |
---|
94 | * further-along scanning of the pattern. In these cases, a warning |
---|
95 | * message is issued. |
---|
96 | * |
---|
97 | * nfa_states[1 .. num_states] is the list of NFA states in the DFA. |
---|
98 | * accset[1 .. nacc] is the list of accepting numbers for the DFA state. |
---|
99 | */ |
---|
100 | |
---|
101 | void check_trailing_context( nfa_states, num_states, accset, nacc ) |
---|
102 | int *nfa_states, num_states; |
---|
103 | int *accset; |
---|
104 | int nacc; |
---|
105 | { |
---|
106 | register int i, j; |
---|
107 | |
---|
108 | for ( i = 1; i <= num_states; ++i ) |
---|
109 | { |
---|
110 | int ns = nfa_states[i]; |
---|
111 | register int type = state_type[ns]; |
---|
112 | register int ar = assoc_rule[ns]; |
---|
113 | |
---|
114 | if ( type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE ) |
---|
115 | { /* do nothing */ |
---|
116 | } |
---|
117 | |
---|
118 | else if ( type == STATE_TRAILING_CONTEXT ) |
---|
119 | { |
---|
120 | /* Potential trouble. Scan set of accepting numbers |
---|
121 | * for the one marking the end of the "head". We |
---|
122 | * assume that this looping will be fairly cheap |
---|
123 | * since it's rare that an accepting number set |
---|
124 | * is large. |
---|
125 | */ |
---|
126 | for ( j = 1; j <= nacc; ++j ) |
---|
127 | if ( accset[j] & YY_TRAILING_HEAD_MASK ) |
---|
128 | { |
---|
129 | line_warning( |
---|
130 | _( "dangerous trailing context" ), |
---|
131 | rule_linenum[ar] ); |
---|
132 | return; |
---|
133 | } |
---|
134 | } |
---|
135 | } |
---|
136 | } |
---|
137 | |
---|
138 | |
---|
139 | /* dump_associated_rules - list the rules associated with a DFA state |
---|
140 | * |
---|
141 | * Goes through the set of NFA states associated with the DFA and |
---|
142 | * extracts the first MAX_ASSOC_RULES unique rules, sorts them, |
---|
143 | * and writes a report to the given file. |
---|
144 | */ |
---|
145 | |
---|
146 | void dump_associated_rules( file, ds ) |
---|
147 | FILE *file; |
---|
148 | int ds; |
---|
149 | { |
---|
150 | register int i, j; |
---|
151 | register int num_associated_rules = 0; |
---|
152 | int rule_set[MAX_ASSOC_RULES + 1]; |
---|
153 | int *dset = dss[ds]; |
---|
154 | int size = dfasiz[ds]; |
---|
155 | |
---|
156 | for ( i = 1; i <= size; ++i ) |
---|
157 | { |
---|
158 | register int rule_num = rule_linenum[assoc_rule[dset[i]]]; |
---|
159 | |
---|
160 | for ( j = 1; j <= num_associated_rules; ++j ) |
---|
161 | if ( rule_num == rule_set[j] ) |
---|
162 | break; |
---|
163 | |
---|
164 | if ( j > num_associated_rules ) |
---|
165 | { /* new rule */ |
---|
166 | if ( num_associated_rules < MAX_ASSOC_RULES ) |
---|
167 | rule_set[++num_associated_rules] = rule_num; |
---|
168 | } |
---|
169 | } |
---|
170 | |
---|
171 | bubble( rule_set, num_associated_rules ); |
---|
172 | |
---|
173 | fprintf( file, _( " associated rule line numbers:" ) ); |
---|
174 | |
---|
175 | for ( i = 1; i <= num_associated_rules; ++i ) |
---|
176 | { |
---|
177 | if ( i % 8 == 1 ) |
---|
178 | putc( '\n', file ); |
---|
179 | |
---|
180 | fprintf( file, "\t%d", rule_set[i] ); |
---|
181 | } |
---|
182 | |
---|
183 | putc( '\n', file ); |
---|
184 | } |
---|
185 | |
---|
186 | |
---|
187 | /* dump_transitions - list the transitions associated with a DFA state |
---|
188 | * |
---|
189 | * synopsis |
---|
190 | * dump_transitions( FILE *file, int state[numecs] ); |
---|
191 | * |
---|
192 | * Goes through the set of out-transitions and lists them in human-readable |
---|
193 | * form (i.e., not as equivalence classes); also lists jam transitions |
---|
194 | * (i.e., all those which are not out-transitions, plus EOF). The dump |
---|
195 | * is done to the given file. |
---|
196 | */ |
---|
197 | |
---|
198 | void dump_transitions( file, state ) |
---|
199 | FILE *file; |
---|
200 | int state[]; |
---|
201 | { |
---|
202 | register int i, ec; |
---|
203 | int out_char_set[CSIZE]; |
---|
204 | |
---|
205 | for ( i = 0; i < csize; ++i ) |
---|
206 | { |
---|
207 | ec = ABS( ecgroup[i] ); |
---|
208 | out_char_set[i] = state[ec]; |
---|
209 | } |
---|
210 | |
---|
211 | fprintf( file, _( " out-transitions: " ) ); |
---|
212 | |
---|
213 | list_character_set( file, out_char_set ); |
---|
214 | |
---|
215 | /* now invert the members of the set to get the jam transitions */ |
---|
216 | for ( i = 0; i < csize; ++i ) |
---|
217 | out_char_set[i] = ! out_char_set[i]; |
---|
218 | |
---|
219 | fprintf( file, _( "\n jam-transitions: EOF " ) ); |
---|
220 | |
---|
221 | list_character_set( file, out_char_set ); |
---|
222 | |
---|
223 | putc( '\n', file ); |
---|
224 | } |
---|
225 | |
---|
226 | |
---|
227 | /* epsclosure - construct the epsilon closure of a set of ndfa states |
---|
228 | * |
---|
229 | * synopsis |
---|
230 | * int *epsclosure( int t[num_states], int *numstates_addr, |
---|
231 | * int accset[num_rules+1], int *nacc_addr, |
---|
232 | * int *hashval_addr ); |
---|
233 | * |
---|
234 | * NOTES |
---|
235 | * The epsilon closure is the set of all states reachable by an arbitrary |
---|
236 | * number of epsilon transitions, which themselves do not have epsilon |
---|
237 | * transitions going out, unioned with the set of states which have non-null |
---|
238 | * accepting numbers. t is an array of size numstates of nfa state numbers. |
---|
239 | * Upon return, t holds the epsilon closure and *numstates_addr is updated. |
---|
240 | * accset holds a list of the accepting numbers, and the size of accset is |
---|
241 | * given by *nacc_addr. t may be subjected to reallocation if it is not |
---|
242 | * large enough to hold the epsilon closure. |
---|
243 | * |
---|
244 | * hashval is the hash value for the dfa corresponding to the state set. |
---|
245 | */ |
---|
246 | |
---|
247 | int *epsclosure( t, ns_addr, accset, nacc_addr, hv_addr ) |
---|
248 | int *t, *ns_addr, accset[], *nacc_addr, *hv_addr; |
---|
249 | { |
---|
250 | register int stkpos, ns, tsp; |
---|
251 | int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum; |
---|
252 | int stkend, nstate; |
---|
253 | static int did_stk_init = false, *stk; |
---|
254 | |
---|
255 | #define MARK_STATE(state) \ |
---|
256 | trans1[state] = trans1[state] - MARKER_DIFFERENCE; |
---|
257 | |
---|
258 | #define IS_MARKED(state) (trans1[state] < 0) |
---|
259 | |
---|
260 | #define UNMARK_STATE(state) \ |
---|
261 | trans1[state] = trans1[state] + MARKER_DIFFERENCE; |
---|
262 | |
---|
263 | #define CHECK_ACCEPT(state) \ |
---|
264 | { \ |
---|
265 | nfaccnum = accptnum[state]; \ |
---|
266 | if ( nfaccnum != NIL ) \ |
---|
267 | accset[++nacc] = nfaccnum; \ |
---|
268 | } |
---|
269 | |
---|
270 | #define DO_REALLOCATION \ |
---|
271 | { \ |
---|
272 | current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \ |
---|
273 | ++num_reallocs; \ |
---|
274 | t = reallocate_integer_array( t, current_max_dfa_size ); \ |
---|
275 | stk = reallocate_integer_array( stk, current_max_dfa_size ); \ |
---|
276 | } \ |
---|
277 | |
---|
278 | #define PUT_ON_STACK(state) \ |
---|
279 | { \ |
---|
280 | if ( ++stkend >= current_max_dfa_size ) \ |
---|
281 | DO_REALLOCATION \ |
---|
282 | stk[stkend] = state; \ |
---|
283 | MARK_STATE(state) \ |
---|
284 | } |
---|
285 | |
---|
286 | #define ADD_STATE(state) \ |
---|
287 | { \ |
---|
288 | if ( ++numstates >= current_max_dfa_size ) \ |
---|
289 | DO_REALLOCATION \ |
---|
290 | t[numstates] = state; \ |
---|
291 | hashval += state; \ |
---|
292 | } |
---|
293 | |
---|
294 | #define STACK_STATE(state) \ |
---|
295 | { \ |
---|
296 | PUT_ON_STACK(state) \ |
---|
297 | CHECK_ACCEPT(state) \ |
---|
298 | if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \ |
---|
299 | ADD_STATE(state) \ |
---|
300 | } |
---|
301 | |
---|
302 | |
---|
303 | if ( ! did_stk_init ) |
---|
304 | { |
---|
305 | stk = allocate_integer_array( current_max_dfa_size ); |
---|
306 | did_stk_init = true; |
---|
307 | } |
---|
308 | |
---|
309 | nacc = stkend = hashval = 0; |
---|
310 | |
---|
311 | for ( nstate = 1; nstate <= numstates; ++nstate ) |
---|
312 | { |
---|
313 | ns = t[nstate]; |
---|
314 | |
---|
315 | /* The state could be marked if we've already pushed it onto |
---|
316 | * the stack. |
---|
317 | */ |
---|
318 | if ( ! IS_MARKED(ns) ) |
---|
319 | { |
---|
320 | PUT_ON_STACK(ns) |
---|
321 | CHECK_ACCEPT(ns) |
---|
322 | hashval += ns; |
---|
323 | } |
---|
324 | } |
---|
325 | |
---|
326 | for ( stkpos = 1; stkpos <= stkend; ++stkpos ) |
---|
327 | { |
---|
328 | ns = stk[stkpos]; |
---|
329 | transsym = transchar[ns]; |
---|
330 | |
---|
331 | if ( transsym == SYM_EPSILON ) |
---|
332 | { |
---|
333 | tsp = trans1[ns] + MARKER_DIFFERENCE; |
---|
334 | |
---|
335 | if ( tsp != NO_TRANSITION ) |
---|
336 | { |
---|
337 | if ( ! IS_MARKED(tsp) ) |
---|
338 | STACK_STATE(tsp) |
---|
339 | |
---|
340 | tsp = trans2[ns]; |
---|
341 | |
---|
342 | if ( tsp != NO_TRANSITION && ! IS_MARKED(tsp) ) |
---|
343 | STACK_STATE(tsp) |
---|
344 | } |
---|
345 | } |
---|
346 | } |
---|
347 | |
---|
348 | /* Clear out "visit" markers. */ |
---|
349 | |
---|
350 | for ( stkpos = 1; stkpos <= stkend; ++stkpos ) |
---|
351 | { |
---|
352 | if ( IS_MARKED(stk[stkpos]) ) |
---|
353 | UNMARK_STATE(stk[stkpos]) |
---|
354 | else |
---|
355 | flexfatal( |
---|
356 | _( "consistency check failed in epsclosure()" ) ); |
---|
357 | } |
---|
358 | |
---|
359 | *ns_addr = numstates; |
---|
360 | *hv_addr = hashval; |
---|
361 | *nacc_addr = nacc; |
---|
362 | |
---|
363 | return t; |
---|
364 | } |
---|
365 | |
---|
366 | |
---|
367 | /* increase_max_dfas - increase the maximum number of DFAs */ |
---|
368 | |
---|
369 | void increase_max_dfas() |
---|
370 | { |
---|
371 | current_max_dfas += MAX_DFAS_INCREMENT; |
---|
372 | |
---|
373 | ++num_reallocs; |
---|
374 | |
---|
375 | base = reallocate_integer_array( base, current_max_dfas ); |
---|
376 | def = reallocate_integer_array( def, current_max_dfas ); |
---|
377 | dfasiz = reallocate_integer_array( dfasiz, current_max_dfas ); |
---|
378 | accsiz = reallocate_integer_array( accsiz, current_max_dfas ); |
---|
379 | dhash = reallocate_integer_array( dhash, current_max_dfas ); |
---|
380 | dss = reallocate_int_ptr_array( dss, current_max_dfas ); |
---|
381 | dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas ); |
---|
382 | |
---|
383 | if ( nultrans ) |
---|
384 | nultrans = |
---|
385 | reallocate_integer_array( nultrans, current_max_dfas ); |
---|
386 | } |
---|
387 | |
---|
388 | |
---|
389 | /* ntod - convert an ndfa to a dfa |
---|
390 | * |
---|
391 | * Creates the dfa corresponding to the ndfa we've constructed. The |
---|
392 | * dfa starts out in state #1. |
---|
393 | */ |
---|
394 | |
---|
395 | void ntod() |
---|
396 | { |
---|
397 | int *accset, ds, nacc, newds; |
---|
398 | int sym, hashval, numstates, dsize; |
---|
399 | int num_full_table_rows; /* used only for -f */ |
---|
400 | int *nset, *dset; |
---|
401 | int targptr, totaltrans, i, comstate, comfreq, targ; |
---|
402 | int symlist[CSIZE + 1]; |
---|
403 | int num_start_states; |
---|
404 | int todo_head, todo_next; |
---|
405 | |
---|
406 | /* Note that the following are indexed by *equivalence classes* |
---|
407 | * and not by characters. Since equivalence classes are indexed |
---|
408 | * beginning with 1, even if the scanner accepts NUL's, this |
---|
409 | * means that (since every character is potentially in its own |
---|
410 | * equivalence class) these arrays must have room for indices |
---|
411 | * from 1 to CSIZE, so their size must be CSIZE + 1. |
---|
412 | */ |
---|
413 | int duplist[CSIZE + 1], state[CSIZE + 1]; |
---|
414 | int targfreq[CSIZE + 1], targstate[CSIZE + 1]; |
---|
415 | |
---|
416 | accset = allocate_integer_array( num_rules + 1 ); |
---|
417 | nset = allocate_integer_array( current_max_dfa_size ); |
---|
418 | |
---|
419 | /* The "todo" queue is represented by the head, which is the DFA |
---|
420 | * state currently being processed, and the "next", which is the |
---|
421 | * next DFA state number available (not in use). We depend on the |
---|
422 | * fact that snstods() returns DFA's \in increasing order/, and thus |
---|
423 | * need only know the bounds of the dfas to be processed. |
---|
424 | */ |
---|
425 | todo_head = todo_next = 0; |
---|
426 | |
---|
427 | for ( i = 0; i <= csize; ++i ) |
---|
428 | { |
---|
429 | duplist[i] = NIL; |
---|
430 | symlist[i] = false; |
---|
431 | } |
---|
432 | |
---|
433 | for ( i = 0; i <= num_rules; ++i ) |
---|
434 | accset[i] = NIL; |
---|
435 | |
---|
436 | if ( trace ) |
---|
437 | { |
---|
438 | dumpnfa( scset[1] ); |
---|
439 | fputs( _( "\n\nDFA Dump:\n\n" ), stderr ); |
---|
440 | } |
---|
441 | |
---|
442 | inittbl(); |
---|
443 | |
---|
444 | /* Check to see whether we should build a separate table for |
---|
445 | * transitions on NUL characters. We don't do this for full-speed |
---|
446 | * (-F) scanners, since for them we don't have a simple state |
---|
447 | * number lying around with which to index the table. We also |
---|
448 | * don't bother doing it for scanners unless (1) NUL is in its own |
---|
449 | * equivalence class (indicated by a positive value of |
---|
450 | * ecgroup[NUL]), (2) NUL's equivalence class is the last |
---|
451 | * equivalence class, and (3) the number of equivalence classes is |
---|
452 | * the same as the number of characters. This latter case comes |
---|
453 | * about when useecs is false or when it's true but every character |
---|
454 | * still manages to land in its own class (unlikely, but it's |
---|
455 | * cheap to check for). If all these things are true then the |
---|
456 | * character code needed to represent NUL's equivalence class for |
---|
457 | * indexing the tables is going to take one more bit than the |
---|
458 | * number of characters, and therefore we won't be assured of |
---|
459 | * being able to fit it into a YY_CHAR variable. This rules out |
---|
460 | * storing the transitions in a compressed table, since the code |
---|
461 | * for interpreting them uses a YY_CHAR variable (perhaps it |
---|
462 | * should just use an integer, though; this is worth pondering ... |
---|
463 | * ###). |
---|
464 | * |
---|
465 | * Finally, for full tables, we want the number of entries in the |
---|
466 | * table to be a power of two so the array references go fast (it |
---|
467 | * will just take a shift to compute the major index). If |
---|
468 | * encoding NUL's transitions in the table will spoil this, we |
---|
469 | * give it its own table (note that this will be the case if we're |
---|
470 | * not using equivalence classes). |
---|
471 | */ |
---|
472 | |
---|
473 | /* Note that the test for ecgroup[0] == numecs below accomplishes |
---|
474 | * both (1) and (2) above |
---|
475 | */ |
---|
476 | if ( ! fullspd && ecgroup[0] == numecs ) |
---|
477 | { |
---|
478 | /* NUL is alone in its equivalence class, which is the |
---|
479 | * last one. |
---|
480 | */ |
---|
481 | int use_NUL_table = (numecs == csize); |
---|
482 | |
---|
483 | if ( fulltbl && ! use_NUL_table ) |
---|
484 | { |
---|
485 | /* We still may want to use the table if numecs |
---|
486 | * is a power of 2. |
---|
487 | */ |
---|
488 | int power_of_two; |
---|
489 | |
---|
490 | for ( power_of_two = 1; power_of_two <= csize; |
---|
491 | power_of_two *= 2 ) |
---|
492 | if ( numecs == power_of_two ) |
---|
493 | { |
---|
494 | use_NUL_table = true; |
---|
495 | break; |
---|
496 | } |
---|
497 | } |
---|
498 | |
---|
499 | if ( use_NUL_table ) |
---|
500 | nultrans = allocate_integer_array( current_max_dfas ); |
---|
501 | |
---|
502 | /* From now on, nultrans != nil indicates that we're |
---|
503 | * saving null transitions for later, separate encoding. |
---|
504 | */ |
---|
505 | } |
---|
506 | |
---|
507 | |
---|
508 | if ( fullspd ) |
---|
509 | { |
---|
510 | for ( i = 0; i <= numecs; ++i ) |
---|
511 | state[i] = 0; |
---|
512 | |
---|
513 | place_state( state, 0, 0 ); |
---|
514 | dfaacc[0].dfaacc_state = 0; |
---|
515 | } |
---|
516 | |
---|
517 | else if ( fulltbl ) |
---|
518 | { |
---|
519 | if ( nultrans ) |
---|
520 | /* We won't be including NUL's transitions in the |
---|
521 | * table, so build it for entries from 0 .. numecs - 1. |
---|
522 | */ |
---|
523 | num_full_table_rows = numecs; |
---|
524 | |
---|
525 | else |
---|
526 | /* Take into account the fact that we'll be including |
---|
527 | * the NUL entries in the transition table. Build it |
---|
528 | * from 0 .. numecs. |
---|
529 | */ |
---|
530 | num_full_table_rows = numecs + 1; |
---|
531 | |
---|
532 | /* Unless -Ca, declare it "short" because it's a real |
---|
533 | * long-shot that that won't be large enough. |
---|
534 | */ |
---|
535 | out_str_dec( "static yyconst %s yy_nxt[][%d] =\n {\n", |
---|
536 | /* '}' so vi doesn't get too confused */ |
---|
537 | long_align ? "long" : "short", num_full_table_rows ); |
---|
538 | |
---|
539 | outn( " {" ); |
---|
540 | |
---|
541 | /* Generate 0 entries for state #0. */ |
---|
542 | for ( i = 0; i < num_full_table_rows; ++i ) |
---|
543 | mk2data( 0 ); |
---|
544 | |
---|
545 | dataflush(); |
---|
546 | outn( " },\n" ); |
---|
547 | } |
---|
548 | |
---|
549 | /* Create the first states. */ |
---|
550 | |
---|
551 | num_start_states = lastsc * 2; |
---|
552 | |
---|
553 | for ( i = 1; i <= num_start_states; ++i ) |
---|
554 | { |
---|
555 | numstates = 1; |
---|
556 | |
---|
557 | /* For each start condition, make one state for the case when |
---|
558 | * we're at the beginning of the line (the '^' operator) and |
---|
559 | * one for the case when we're not. |
---|
560 | */ |
---|
561 | if ( i % 2 == 1 ) |
---|
562 | nset[numstates] = scset[(i / 2) + 1]; |
---|
563 | else |
---|
564 | nset[numstates] = |
---|
565 | mkbranch( scbol[i / 2], scset[i / 2] ); |
---|
566 | |
---|
567 | nset = epsclosure( nset, &numstates, accset, &nacc, &hashval ); |
---|
568 | |
---|
569 | if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) ) |
---|
570 | { |
---|
571 | numas += nacc; |
---|
572 | totnst += numstates; |
---|
573 | ++todo_next; |
---|
574 | |
---|
575 | if ( variable_trailing_context_rules && nacc > 0 ) |
---|
576 | check_trailing_context( nset, numstates, |
---|
577 | accset, nacc ); |
---|
578 | } |
---|
579 | } |
---|
580 | |
---|
581 | if ( ! fullspd ) |
---|
582 | { |
---|
583 | if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) ) |
---|
584 | flexfatal( |
---|
585 | _( "could not create unique end-of-buffer state" ) ); |
---|
586 | |
---|
587 | ++numas; |
---|
588 | ++num_start_states; |
---|
589 | ++todo_next; |
---|
590 | } |
---|
591 | |
---|
592 | while ( todo_head < todo_next ) |
---|
593 | { |
---|
594 | targptr = 0; |
---|
595 | totaltrans = 0; |
---|
596 | |
---|
597 | for ( i = 1; i <= numecs; ++i ) |
---|
598 | state[i] = 0; |
---|
599 | |
---|
600 | ds = ++todo_head; |
---|
601 | |
---|
602 | dset = dss[ds]; |
---|
603 | dsize = dfasiz[ds]; |
---|
604 | |
---|
605 | if ( trace ) |
---|
606 | fprintf( stderr, _( "state # %d:\n" ), ds ); |
---|
607 | |
---|
608 | sympartition( dset, dsize, symlist, duplist ); |
---|
609 | |
---|
610 | for ( sym = 1; sym <= numecs; ++sym ) |
---|
611 | { |
---|
612 | if ( symlist[sym] ) |
---|
613 | { |
---|
614 | symlist[sym] = 0; |
---|
615 | |
---|
616 | if ( duplist[sym] == NIL ) |
---|
617 | { |
---|
618 | /* Symbol has unique out-transitions. */ |
---|
619 | numstates = symfollowset( dset, dsize, |
---|
620 | sym, nset ); |
---|
621 | nset = epsclosure( nset, &numstates, |
---|
622 | accset, &nacc, &hashval ); |
---|
623 | |
---|
624 | if ( snstods( nset, numstates, accset, |
---|
625 | nacc, hashval, &newds ) ) |
---|
626 | { |
---|
627 | totnst = totnst + numstates; |
---|
628 | ++todo_next; |
---|
629 | numas += nacc; |
---|
630 | |
---|
631 | if ( |
---|
632 | variable_trailing_context_rules && |
---|
633 | nacc > 0 ) |
---|
634 | check_trailing_context( |
---|
635 | nset, numstates, |
---|
636 | accset, nacc ); |
---|
637 | } |
---|
638 | |
---|
639 | state[sym] = newds; |
---|
640 | |
---|
641 | if ( trace ) |
---|
642 | fprintf( stderr, "\t%d\t%d\n", |
---|
643 | sym, newds ); |
---|
644 | |
---|
645 | targfreq[++targptr] = 1; |
---|
646 | targstate[targptr] = newds; |
---|
647 | ++numuniq; |
---|
648 | } |
---|
649 | |
---|
650 | else |
---|
651 | { |
---|
652 | /* sym's equivalence class has the same |
---|
653 | * transitions as duplist(sym)'s |
---|
654 | * equivalence class. |
---|
655 | */ |
---|
656 | targ = state[duplist[sym]]; |
---|
657 | state[sym] = targ; |
---|
658 | |
---|
659 | if ( trace ) |
---|
660 | fprintf( stderr, "\t%d\t%d\n", |
---|
661 | sym, targ ); |
---|
662 | |
---|
663 | /* Update frequency count for |
---|
664 | * destination state. |
---|
665 | */ |
---|
666 | |
---|
667 | i = 0; |
---|
668 | while ( targstate[++i] != targ ) |
---|
669 | ; |
---|
670 | |
---|
671 | ++targfreq[i]; |
---|
672 | ++numdup; |
---|
673 | } |
---|
674 | |
---|
675 | ++totaltrans; |
---|
676 | duplist[sym] = NIL; |
---|
677 | } |
---|
678 | } |
---|
679 | |
---|
680 | if ( caseins && ! useecs ) |
---|
681 | { |
---|
682 | register int j; |
---|
683 | |
---|
684 | for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j ) |
---|
685 | { |
---|
686 | if ( state[i] == 0 && state[j] != 0 ) |
---|
687 | /* We're adding a transition. */ |
---|
688 | ++totaltrans; |
---|
689 | |
---|
690 | else if ( state[i] != 0 && state[j] == 0 ) |
---|
691 | /* We're taking away a transition. */ |
---|
692 | --totaltrans; |
---|
693 | |
---|
694 | state[i] = state[j]; |
---|
695 | } |
---|
696 | } |
---|
697 | |
---|
698 | numsnpairs += totaltrans; |
---|
699 | |
---|
700 | if ( ds > num_start_states ) |
---|
701 | check_for_backing_up( ds, state ); |
---|
702 | |
---|
703 | if ( nultrans ) |
---|
704 | { |
---|
705 | nultrans[ds] = state[NUL_ec]; |
---|
706 | state[NUL_ec] = 0; /* remove transition */ |
---|
707 | } |
---|
708 | |
---|
709 | if ( fulltbl ) |
---|
710 | { |
---|
711 | outn( " {" ); |
---|
712 | |
---|
713 | /* Supply array's 0-element. */ |
---|
714 | if ( ds == end_of_buffer_state ) |
---|
715 | mk2data( -end_of_buffer_state ); |
---|
716 | else |
---|
717 | mk2data( end_of_buffer_state ); |
---|
718 | |
---|
719 | for ( i = 1; i < num_full_table_rows; ++i ) |
---|
720 | /* Jams are marked by negative of state |
---|
721 | * number. |
---|
722 | */ |
---|
723 | mk2data( state[i] ? state[i] : -ds ); |
---|
724 | |
---|
725 | dataflush(); |
---|
726 | outn( " },\n" ); |
---|
727 | } |
---|
728 | |
---|
729 | else if ( fullspd ) |
---|
730 | place_state( state, ds, totaltrans ); |
---|
731 | |
---|
732 | else if ( ds == end_of_buffer_state ) |
---|
733 | /* Special case this state to make sure it does what |
---|
734 | * it's supposed to, i.e., jam on end-of-buffer. |
---|
735 | */ |
---|
736 | stack1( ds, 0, 0, JAMSTATE ); |
---|
737 | |
---|
738 | else /* normal, compressed state */ |
---|
739 | { |
---|
740 | /* Determine which destination state is the most |
---|
741 | * common, and how many transitions to it there are. |
---|
742 | */ |
---|
743 | |
---|
744 | comfreq = 0; |
---|
745 | comstate = 0; |
---|
746 | |
---|
747 | for ( i = 1; i <= targptr; ++i ) |
---|
748 | if ( targfreq[i] > comfreq ) |
---|
749 | { |
---|
750 | comfreq = targfreq[i]; |
---|
751 | comstate = targstate[i]; |
---|
752 | } |
---|
753 | |
---|
754 | bldtbl( state, ds, totaltrans, comstate, comfreq ); |
---|
755 | } |
---|
756 | } |
---|
757 | |
---|
758 | if ( fulltbl ) |
---|
759 | dataend(); |
---|
760 | |
---|
761 | else if ( ! fullspd ) |
---|
762 | { |
---|
763 | cmptmps(); /* create compressed template entries */ |
---|
764 | |
---|
765 | /* Create tables for all the states with only one |
---|
766 | * out-transition. |
---|
767 | */ |
---|
768 | while ( onesp > 0 ) |
---|
769 | { |
---|
770 | mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp], |
---|
771 | onedef[onesp] ); |
---|
772 | --onesp; |
---|
773 | } |
---|
774 | |
---|
775 | mkdeftbl(); |
---|
776 | } |
---|
777 | |
---|
778 | flex_free( (void *) accset ); |
---|
779 | flex_free( (void *) nset ); |
---|
780 | } |
---|
781 | |
---|
782 | |
---|
783 | /* snstods - converts a set of ndfa states into a dfa state |
---|
784 | * |
---|
785 | * synopsis |
---|
786 | * is_new_state = snstods( int sns[numstates], int numstates, |
---|
787 | * int accset[num_rules+1], int nacc, |
---|
788 | * int hashval, int *newds_addr ); |
---|
789 | * |
---|
790 | * On return, the dfa state number is in newds. |
---|
791 | */ |
---|
792 | |
---|
793 | int snstods( sns, numstates, accset, nacc, hashval, newds_addr ) |
---|
794 | int sns[], numstates, accset[], nacc, hashval, *newds_addr; |
---|
795 | { |
---|
796 | int didsort = 0; |
---|
797 | register int i, j; |
---|
798 | int newds, *oldsns; |
---|
799 | |
---|
800 | for ( i = 1; i <= lastdfa; ++i ) |
---|
801 | if ( hashval == dhash[i] ) |
---|
802 | { |
---|
803 | if ( numstates == dfasiz[i] ) |
---|
804 | { |
---|
805 | oldsns = dss[i]; |
---|
806 | |
---|
807 | if ( ! didsort ) |
---|
808 | { |
---|
809 | /* We sort the states in sns so we |
---|
810 | * can compare it to oldsns quickly. |
---|
811 | * We use bubble because there probably |
---|
812 | * aren't very many states. |
---|
813 | */ |
---|
814 | bubble( sns, numstates ); |
---|
815 | didsort = 1; |
---|
816 | } |
---|
817 | |
---|
818 | for ( j = 1; j <= numstates; ++j ) |
---|
819 | if ( sns[j] != oldsns[j] ) |
---|
820 | break; |
---|
821 | |
---|
822 | if ( j > numstates ) |
---|
823 | { |
---|
824 | ++dfaeql; |
---|
825 | *newds_addr = i; |
---|
826 | return 0; |
---|
827 | } |
---|
828 | |
---|
829 | ++hshcol; |
---|
830 | } |
---|
831 | |
---|
832 | else |
---|
833 | ++hshsave; |
---|
834 | } |
---|
835 | |
---|
836 | /* Make a new dfa. */ |
---|
837 | |
---|
838 | if ( ++lastdfa >= current_max_dfas ) |
---|
839 | increase_max_dfas(); |
---|
840 | |
---|
841 | newds = lastdfa; |
---|
842 | |
---|
843 | dss[newds] = allocate_integer_array( numstates + 1 ); |
---|
844 | |
---|
845 | /* If we haven't already sorted the states in sns, we do so now, |
---|
846 | * so that future comparisons with it can be made quickly. |
---|
847 | */ |
---|
848 | |
---|
849 | if ( ! didsort ) |
---|
850 | bubble( sns, numstates ); |
---|
851 | |
---|
852 | for ( i = 1; i <= numstates; ++i ) |
---|
853 | dss[newds][i] = sns[i]; |
---|
854 | |
---|
855 | dfasiz[newds] = numstates; |
---|
856 | dhash[newds] = hashval; |
---|
857 | |
---|
858 | if ( nacc == 0 ) |
---|
859 | { |
---|
860 | if ( reject ) |
---|
861 | dfaacc[newds].dfaacc_set = (int *) 0; |
---|
862 | else |
---|
863 | dfaacc[newds].dfaacc_state = 0; |
---|
864 | |
---|
865 | accsiz[newds] = 0; |
---|
866 | } |
---|
867 | |
---|
868 | else if ( reject ) |
---|
869 | { |
---|
870 | /* We sort the accepting set in increasing order so the |
---|
871 | * disambiguating rule that the first rule listed is considered |
---|
872 | * match in the event of ties will work. We use a bubble |
---|
873 | * sort since the list is probably quite small. |
---|
874 | */ |
---|
875 | |
---|
876 | bubble( accset, nacc ); |
---|
877 | |
---|
878 | dfaacc[newds].dfaacc_set = allocate_integer_array( nacc + 1 ); |
---|
879 | |
---|
880 | /* Save the accepting set for later */ |
---|
881 | for ( i = 1; i <= nacc; ++i ) |
---|
882 | { |
---|
883 | dfaacc[newds].dfaacc_set[i] = accset[i]; |
---|
884 | |
---|
885 | if ( accset[i] <= num_rules ) |
---|
886 | /* Who knows, perhaps a REJECT can yield |
---|
887 | * this rule. |
---|
888 | */ |
---|
889 | rule_useful[accset[i]] = true; |
---|
890 | } |
---|
891 | |
---|
892 | accsiz[newds] = nacc; |
---|
893 | } |
---|
894 | |
---|
895 | else |
---|
896 | { |
---|
897 | /* Find lowest numbered rule so the disambiguating rule |
---|
898 | * will work. |
---|
899 | */ |
---|
900 | j = num_rules + 1; |
---|
901 | |
---|
902 | for ( i = 1; i <= nacc; ++i ) |
---|
903 | if ( accset[i] < j ) |
---|
904 | j = accset[i]; |
---|
905 | |
---|
906 | dfaacc[newds].dfaacc_state = j; |
---|
907 | |
---|
908 | if ( j <= num_rules ) |
---|
909 | rule_useful[j] = true; |
---|
910 | } |
---|
911 | |
---|
912 | *newds_addr = newds; |
---|
913 | |
---|
914 | return 1; |
---|
915 | } |
---|
916 | |
---|
917 | |
---|
918 | /* symfollowset - follow the symbol transitions one step |
---|
919 | * |
---|
920 | * synopsis |
---|
921 | * numstates = symfollowset( int ds[current_max_dfa_size], int dsize, |
---|
922 | * int transsym, int nset[current_max_dfa_size] ); |
---|
923 | */ |
---|
924 | |
---|
925 | int symfollowset( ds, dsize, transsym, nset ) |
---|
926 | int ds[], dsize, transsym, nset[]; |
---|
927 | { |
---|
928 | int ns, tsp, sym, i, j, lenccl, ch, numstates, ccllist; |
---|
929 | |
---|
930 | numstates = 0; |
---|
931 | |
---|
932 | for ( i = 1; i <= dsize; ++i ) |
---|
933 | { /* for each nfa state ns in the state set of ds */ |
---|
934 | ns = ds[i]; |
---|
935 | sym = transchar[ns]; |
---|
936 | tsp = trans1[ns]; |
---|
937 | |
---|
938 | if ( sym < 0 ) |
---|
939 | { /* it's a character class */ |
---|
940 | sym = -sym; |
---|
941 | ccllist = cclmap[sym]; |
---|
942 | lenccl = ccllen[sym]; |
---|
943 | |
---|
944 | if ( cclng[sym] ) |
---|
945 | { |
---|
946 | for ( j = 0; j < lenccl; ++j ) |
---|
947 | { |
---|
948 | /* Loop through negated character |
---|
949 | * class. |
---|
950 | */ |
---|
951 | ch = ccltbl[ccllist + j]; |
---|
952 | |
---|
953 | if ( ch == 0 ) |
---|
954 | ch = NUL_ec; |
---|
955 | |
---|
956 | if ( ch > transsym ) |
---|
957 | /* Transsym isn't in negated |
---|
958 | * ccl. |
---|
959 | */ |
---|
960 | break; |
---|
961 | |
---|
962 | else if ( ch == transsym ) |
---|
963 | /* next 2 */ goto bottom; |
---|
964 | } |
---|
965 | |
---|
966 | /* Didn't find transsym in ccl. */ |
---|
967 | nset[++numstates] = tsp; |
---|
968 | } |
---|
969 | |
---|
970 | else |
---|
971 | for ( j = 0; j < lenccl; ++j ) |
---|
972 | { |
---|
973 | ch = ccltbl[ccllist + j]; |
---|
974 | |
---|
975 | if ( ch == 0 ) |
---|
976 | ch = NUL_ec; |
---|
977 | |
---|
978 | if ( ch > transsym ) |
---|
979 | break; |
---|
980 | else if ( ch == transsym ) |
---|
981 | { |
---|
982 | nset[++numstates] = tsp; |
---|
983 | break; |
---|
984 | } |
---|
985 | } |
---|
986 | } |
---|
987 | |
---|
988 | else if ( sym >= 'A' && sym <= 'Z' && caseins ) |
---|
989 | flexfatal( |
---|
990 | _( "consistency check failed in symfollowset" ) ); |
---|
991 | |
---|
992 | else if ( sym == SYM_EPSILON ) |
---|
993 | { /* do nothing */ |
---|
994 | } |
---|
995 | |
---|
996 | else if ( ABS( ecgroup[sym] ) == transsym ) |
---|
997 | nset[++numstates] = tsp; |
---|
998 | |
---|
999 | bottom: ; |
---|
1000 | } |
---|
1001 | |
---|
1002 | return numstates; |
---|
1003 | } |
---|
1004 | |
---|
1005 | |
---|
1006 | /* sympartition - partition characters with same out-transitions |
---|
1007 | * |
---|
1008 | * synopsis |
---|
1009 | * sympartition( int ds[current_max_dfa_size], int numstates, |
---|
1010 | * int symlist[numecs], int duplist[numecs] ); |
---|
1011 | */ |
---|
1012 | |
---|
1013 | void sympartition( ds, numstates, symlist, duplist ) |
---|
1014 | int ds[], numstates; |
---|
1015 | int symlist[], duplist[]; |
---|
1016 | { |
---|
1017 | int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich; |
---|
1018 | |
---|
1019 | /* Partitioning is done by creating equivalence classes for those |
---|
1020 | * characters which have out-transitions from the given state. Thus |
---|
1021 | * we are really creating equivalence classes of equivalence classes. |
---|
1022 | */ |
---|
1023 | |
---|
1024 | for ( i = 1; i <= numecs; ++i ) |
---|
1025 | { /* initialize equivalence class list */ |
---|
1026 | duplist[i] = i - 1; |
---|
1027 | dupfwd[i] = i + 1; |
---|
1028 | } |
---|
1029 | |
---|
1030 | duplist[1] = NIL; |
---|
1031 | dupfwd[numecs] = NIL; |
---|
1032 | |
---|
1033 | for ( i = 1; i <= numstates; ++i ) |
---|
1034 | { |
---|
1035 | ns = ds[i]; |
---|
1036 | tch = transchar[ns]; |
---|
1037 | |
---|
1038 | if ( tch != SYM_EPSILON ) |
---|
1039 | { |
---|
1040 | if ( tch < -lastccl || tch >= csize ) |
---|
1041 | { |
---|
1042 | flexfatal( |
---|
1043 | _( "bad transition character detected in sympartition()" ) ); |
---|
1044 | } |
---|
1045 | |
---|
1046 | if ( tch >= 0 ) |
---|
1047 | { /* character transition */ |
---|
1048 | int ec = ecgroup[tch]; |
---|
1049 | |
---|
1050 | mkechar( ec, dupfwd, duplist ); |
---|
1051 | symlist[ec] = 1; |
---|
1052 | } |
---|
1053 | |
---|
1054 | else |
---|
1055 | { /* character class */ |
---|
1056 | tch = -tch; |
---|
1057 | |
---|
1058 | lenccl = ccllen[tch]; |
---|
1059 | cclp = cclmap[tch]; |
---|
1060 | mkeccl( ccltbl + cclp, lenccl, dupfwd, |
---|
1061 | duplist, numecs, NUL_ec ); |
---|
1062 | |
---|
1063 | if ( cclng[tch] ) |
---|
1064 | { |
---|
1065 | j = 0; |
---|
1066 | |
---|
1067 | for ( k = 0; k < lenccl; ++k ) |
---|
1068 | { |
---|
1069 | ich = ccltbl[cclp + k]; |
---|
1070 | |
---|
1071 | if ( ich == 0 ) |
---|
1072 | ich = NUL_ec; |
---|
1073 | |
---|
1074 | for ( ++j; j < ich; ++j ) |
---|
1075 | symlist[j] = 1; |
---|
1076 | } |
---|
1077 | |
---|
1078 | for ( ++j; j <= numecs; ++j ) |
---|
1079 | symlist[j] = 1; |
---|
1080 | } |
---|
1081 | |
---|
1082 | else |
---|
1083 | for ( k = 0; k < lenccl; ++k ) |
---|
1084 | { |
---|
1085 | ich = ccltbl[cclp + k]; |
---|
1086 | |
---|
1087 | if ( ich == 0 ) |
---|
1088 | ich = NUL_ec; |
---|
1089 | |
---|
1090 | symlist[ich] = 1; |
---|
1091 | } |
---|
1092 | } |
---|
1093 | } |
---|
1094 | } |
---|
1095 | } |
---|