source: trunk/third/rx/rx/rxsuper.c @ 10430

Revision 10430, 37.0 KB checked in by ghudson, 27 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r10429, which included commits to RCS files with non-trunk default branches.
Line 
1
2
3/*      Copyright (C) 1995, 1996 Tom Lord
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU Library General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
8 * any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 * GNU Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public License
16 * along with this software; see the file COPYING.  If not, write to
17 * the Free Software Foundation, 59 Temple Place - Suite 330,
18 * Boston, MA 02111-1307, USA.
19 */
20
21
22
23#include "rxall.h"
24#include "rxsuper.h"
25
26
27/* The functions in the next several pages define the lazy-NFA-conversion used
28 * by matchers.  The input to this construction is an NFA such as
29 * is built by compactify_nfa (rx.c).  The output is the superNFA.
30 */
31
32/* Match engines can use arbitrary values for opcodes.  So, the parse tree
33 * is built using instructions names (enum rx_opcode), but the superstate
34 * nfa is populated with mystery opcodes (void *).
35 *
36 * For convenience, here is an id table.  The opcodes are == to their inxs
37 *
38 * The lables in re_search_2 would make good values for instructions.
39 */
40
41void * rx_id_instruction_table[rx_num_instructions] =
42{
43  (void *) rx_backtrack_point,
44  (void *) rx_do_side_effects,
45  (void *) rx_cache_miss,
46  (void *) rx_next_char,
47  (void *) rx_backtrack,
48  (void *) rx_error_inx
49};
50
51
52
53
54/* The partially instantiated superstate graph has a transition
55 * table at every node.  There is one entry for every character.
56 * This fills in the transition for a set.
57 */
58#ifdef __STDC__
59static void
60install_transition (struct rx_superstate *super,
61                    struct rx_inx *answer, rx_Bitset trcset)
62#else
63static void
64install_transition (super, answer, trcset)
65     struct rx_superstate *super;
66     struct rx_inx *answer;
67     rx_Bitset trcset;
68#endif
69{
70  struct rx_inx * transitions = super->transitions;
71  int chr;
72  for (chr = 0; chr < 256; )
73    if (!*trcset)
74      {
75        ++trcset;
76        chr += 32;
77      }
78    else
79      {
80        RX_subset sub = *trcset;
81        RX_subset mask = 1;
82        int bound = chr + 32;
83        while (chr < bound)
84          {
85            if (sub & mask)
86              transitions [chr] = *answer;
87            ++chr;
88            mask <<= 1;
89          }
90        ++trcset;
91      }
92}
93
94
95#ifdef __STDC__
96static int
97qlen (struct rx_superstate * q)
98#else
99static int
100qlen (q)
101     struct rx_superstate * q;
102#endif
103{
104  int count = 1;
105  struct rx_superstate * it;
106  if (!q)
107    return 0;
108  for (it = q->next_recyclable; it != q; it = it->next_recyclable)
109    ++count;
110  return count;
111}
112
113#ifdef __STDC__
114static void
115check_cache (struct rx_cache * cache)
116#else
117static void
118check_cache (cache)
119     struct rx_cache * cache;
120#endif
121{
122  struct rx_cache * you_fucked_up = 0;
123  int total = cache->superstates;
124  int semi = cache->semifree_superstates;
125  if (semi != qlen (cache->semifree_superstate))
126    check_cache (you_fucked_up);
127  if ((total - semi) != qlen (cache->lru_superstate))
128    check_cache (you_fucked_up);
129}
130#ifdef __STDC__
131char *
132rx_cache_malloc (struct rx_cache * cache, int size)
133#else
134char *
135rx_cache_malloc (cache, size)
136     struct rx_cache * cache;
137     int size;
138#endif
139{
140  char * answer;
141  answer = (char *)malloc (size);
142  if (answer)
143    cache->bytes_used += size;
144  return answer;
145}
146
147
148#ifdef __STDC__
149void
150rx_cache_free (struct rx_cache * cache, int size, char * mem)
151#else
152void
153rx_cache_free (cache, size, mem)
154     struct rx_cache * cache;
155     int size;
156     char * mem;
157#endif
158{
159  free (mem);
160  cache->bytes_used -= size;
161}
162
163
164
165/* When a superstate is old and neglected, it can enter a
166 * semi-free state.  A semi-free state is slated to die.
167 * Incoming transitions to a semi-free state are re-written
168 * to cause an (interpreted) fault when they are taken.
169 * The fault handler revives the semi-free state, patches
170 * incoming transitions back to normal, and continues.
171 *
172 * The idea is basicly to free in two stages, aborting
173 * between the two if the state turns out to be useful again.
174 * When a free is aborted, the rescued superstate is placed
175 * in the most-favored slot to maximize the time until it
176 * is next semi-freed.
177 *
178 * Overall, this approximates truly freeing superstates in
179 * lru order, but does not involve the cost of updating perfectly
180 * accurate timestamps every time a superstate is touched.
181 */
182
183#ifdef __STDC__
184static void
185semifree_superstate (struct rx_cache * cache)
186#else
187static void
188semifree_superstate (cache)
189     struct rx_cache * cache;
190#endif
191{
192  int disqualified = cache->semifree_superstates;
193  if (disqualified == cache->superstates)
194    return;
195  while (cache->lru_superstate->locks)
196    {
197      cache->lru_superstate = cache->lru_superstate->next_recyclable;
198      ++disqualified;
199      if (disqualified == cache->superstates)
200        return;
201    }
202  {
203    struct rx_superstate * it = cache->lru_superstate;
204    it->next_recyclable->prev_recyclable = it->prev_recyclable;
205    it->prev_recyclable->next_recyclable = it->next_recyclable;
206    cache->lru_superstate = (it == it->next_recyclable
207                             ? 0
208                             : it->next_recyclable);
209    if (!cache->semifree_superstate)
210      {
211        cache->semifree_superstate = it;
212        it->next_recyclable = it;
213        it->prev_recyclable = it;
214      }
215    else
216      {
217        it->prev_recyclable = cache->semifree_superstate->prev_recyclable;
218        it->next_recyclable = cache->semifree_superstate;
219        it->prev_recyclable->next_recyclable = it;
220        it->next_recyclable->prev_recyclable = it;
221      }
222    {
223      struct rx_distinct_future *df;
224      it->is_semifree = 1;
225      ++cache->semifree_superstates;
226      df = it->transition_refs;
227      if (df)
228        {
229          df->prev_same_dest->next_same_dest = 0;
230          for (df = it->transition_refs; df; df = df->next_same_dest)
231            {
232              df->future_frame.inx = cache->instruction_table[rx_cache_miss];
233              df->future_frame.data = 0;
234              df->future_frame.data_2 = (void *) df;
235              /* If there are any NEXT-CHAR instruction frames that
236               * refer to this state, we convert them to CACHE-MISS frames.
237               */
238              if (!df->effects
239                  && (df->edge->options->next_same_super_edge[0]
240                      == df->edge->options))
241                install_transition (df->present, &df->future_frame,
242                                    df->edge->cset);
243            }
244          df = it->transition_refs;
245          df->prev_same_dest->next_same_dest = df;
246        }
247    }
248  }
249}
250
251
252#ifdef __STDC__
253static void
254refresh_semifree_superstate (struct rx_cache * cache,
255                             struct rx_superstate * super)
256#else
257static void
258refresh_semifree_superstate (cache, super)
259     struct rx_cache * cache;
260     struct rx_superstate * super;
261#endif
262{
263  struct rx_distinct_future *df;
264
265  if (super->transition_refs)
266    {
267      super->transition_refs->prev_same_dest->next_same_dest = 0;
268      for (df = super->transition_refs; df; df = df->next_same_dest)
269        {
270          df->future_frame.inx = cache->instruction_table[rx_next_char];
271          df->future_frame.data = (void *) super->transitions;
272          df->future_frame.data_2 = (void *)(super->contents->is_final);
273          /* CACHE-MISS instruction frames that refer to this state,
274           * must be converted to NEXT-CHAR frames.
275           */
276          if (!df->effects
277              && (df->edge->options->next_same_super_edge[0]
278                  == df->edge->options))
279            install_transition (df->present, &df->future_frame,
280                                df->edge->cset);
281        }
282      super->transition_refs->prev_same_dest->next_same_dest
283        = super->transition_refs;
284    }
285  if (cache->semifree_superstate == super)
286    cache->semifree_superstate = (super->prev_recyclable == super
287                                  ? 0
288                                  : super->prev_recyclable);
289  super->next_recyclable->prev_recyclable = super->prev_recyclable;
290  super->prev_recyclable->next_recyclable = super->next_recyclable;
291
292  if (!cache->lru_superstate)
293    (cache->lru_superstate
294     = super->next_recyclable
295     = super->prev_recyclable
296     = super);
297  else
298    {
299      super->next_recyclable = cache->lru_superstate;
300      super->prev_recyclable = cache->lru_superstate->prev_recyclable;
301      super->next_recyclable->prev_recyclable = super;
302      super->prev_recyclable->next_recyclable = super;
303    }
304  super->is_semifree = 0;
305  --cache->semifree_superstates;
306}
307
308#ifdef __STDC__
309void
310rx_refresh_this_superstate (struct rx_cache * cache,
311                            struct rx_superstate * superstate)
312#else
313void
314rx_refresh_this_superstate (cache, superstate)
315     struct rx_cache * cache;
316     struct rx_superstate * superstate;
317#endif
318{
319  if (superstate->is_semifree)
320    refresh_semifree_superstate (cache, superstate);
321  else if (cache->lru_superstate == superstate)
322    cache->lru_superstate = superstate->next_recyclable;
323  else if (superstate != cache->lru_superstate->prev_recyclable)
324    {
325      superstate->next_recyclable->prev_recyclable
326        = superstate->prev_recyclable;
327      superstate->prev_recyclable->next_recyclable
328        = superstate->next_recyclable;
329      superstate->next_recyclable = cache->lru_superstate;
330      superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
331      superstate->next_recyclable->prev_recyclable = superstate;
332      superstate->prev_recyclable->next_recyclable = superstate;
333    }
334}
335
336#ifdef __STDC__
337static void
338release_superset_low (struct rx_cache * cache,
339                     struct rx_superset *set)
340#else
341static void
342release_superset_low (cache, set)
343     struct rx_cache * cache;
344     struct rx_superset *set;
345#endif
346{
347  if (!--set->refs)
348    {
349      if (set->starts_for)
350        set->starts_for->start_set = 0;
351
352      if (set->cdr)
353        release_superset_low (cache, set->cdr);
354
355      rx_hash_free (&set->hash_item, &cache->superset_hash_rules);
356      rx_cache_free (cache,
357                     sizeof (struct rx_superset),
358                     (char *)set);
359    }
360}
361
362#ifdef __STDC__
363void
364rx_release_superset (struct rx *rx,
365                     struct rx_superset *set)
366#else
367void
368rx_release_superset (rx, set)
369     struct rx *rx;
370     struct rx_superset *set;
371#endif
372{
373  release_superset_low (rx->cache, set);
374}
375
376/* This tries to add a new superstate to the superstate freelist.
377 * It might, as a result, free some edge pieces or hash tables.
378 * If nothing can be freed because too many locks are being held, fail.
379 */
380
381#ifdef __STDC__
382static int
383rx_really_free_superstate (struct rx_cache * cache)
384#else
385static int
386rx_really_free_superstate (cache)
387     struct rx_cache * cache;
388#endif
389{
390  int locked_superstates = 0;
391  struct rx_superstate * it;
392
393  if (!cache->superstates)
394    return 0;
395
396  /* scale these */
397  while ((cache->hits + cache->misses) > cache->superstates)
398    {
399      cache->hits >>= 1;
400      cache->misses >>= 1;
401    }
402
403  /* semifree superstates faster than they are freed
404   * so popular states can be rescued.
405   */
406  semifree_superstate (cache);
407  semifree_superstate (cache);
408  semifree_superstate (cache);
409
410#if 0
411  Redundant because semifree_superstate already
412    makes this check;
413
414
415  while (cache->semifree_superstate && cache->semifree_superstate->locks)
416    {
417      refresh_semifree_superstate (cache, cache->semifree_superstate);
418      ++locked_superstates;
419      if (locked_superstates == cache->superstates)
420        return 0;
421    }
422
423
424  if (cache->semifree_superstate)
425    insert the "it =" block which follows this "if 0" code;
426  else
427    {
428      while (cache->lru_superstate->locks)
429        {
430          cache->lru_superstate = cache->lru_superstate->next_recyclable;
431          ++locked_superstates;
432          if (locked_superstates == cache->superstates)
433            return 0;
434        }
435      it = cache->lru_superstate;
436      it->next_recyclable->prev_recyclable = it->prev_recyclable;
437      it->prev_recyclable->next_recyclable = it->next_recyclable;
438      cache->lru_superstate = ((it == it->next_recyclable)
439                                    ? 0
440                                    : it->next_recyclable);
441    }
442#endif
443
444    if (!cache->semifree_superstate)
445      return 0;
446
447    {
448      it = cache->semifree_superstate;
449      it->next_recyclable->prev_recyclable = it->prev_recyclable;
450      it->prev_recyclable->next_recyclable = it->next_recyclable;
451      cache->semifree_superstate = ((it == it->next_recyclable)
452                                    ? 0
453                                    : it->next_recyclable);
454      --cache->semifree_superstates;
455    }
456
457
458  if (it->transition_refs)
459    {
460      struct rx_distinct_future *df;
461      for (df = it->transition_refs,
462           df->prev_same_dest->next_same_dest = 0;
463           df;
464           df = df->next_same_dest)
465        {
466          df->future_frame.inx = cache->instruction_table[rx_cache_miss];
467          df->future_frame.data = 0;
468          df->future_frame.data_2 = (void *) df;
469          df->future = 0;
470        }
471      it->transition_refs->prev_same_dest->next_same_dest =
472        it->transition_refs;
473    }
474  {
475    struct rx_super_edge *tc = it->edges;
476    while (tc)
477      {
478        struct rx_distinct_future * df;
479        struct rx_super_edge *tct = tc->next;
480        df = tc->options;
481        df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
482        while (df)
483          {
484            struct rx_distinct_future *dft = df;
485            df = df->next_same_super_edge[0];
486           
487           
488            if (dft->future && dft->future->transition_refs == dft)
489              {
490                dft->future->transition_refs = dft->next_same_dest;
491                if (dft->future->transition_refs == dft)
492                  dft->future->transition_refs = 0;
493              }
494            dft->next_same_dest->prev_same_dest = dft->prev_same_dest;
495            dft->prev_same_dest->next_same_dest = dft->next_same_dest;
496            rx_cache_free (cache,
497                           sizeof (struct rx_distinct_future),
498                           (char *)dft);
499          }
500        rx_cache_free (cache,
501                       sizeof (struct rx_super_edge),
502                       (char *)tc);
503        tc = tct;
504      }
505  }
506 
507  if (it->contents->superstate == it)
508    it->contents->superstate = 0;
509  release_superset_low (cache, it->contents);
510  rx_cache_free (cache,
511                 (sizeof (struct rx_superstate)
512                  + cache->local_cset_size * sizeof (struct rx_inx)),
513                 (char *)it);
514  --cache->superstates;
515  return 1;
516}
517
518
519#ifdef __STDC__
520static char *
521rx_cache_malloc_or_get (struct rx_cache * cache, int size)
522#else
523static char *
524rx_cache_malloc_or_get (cache, size)
525     struct rx_cache * cache;
526     int size;
527#endif
528{
529  while (   (cache->bytes_used + size > cache->bytes_allowed)
530         && rx_really_free_superstate (cache))
531    ;
532
533  return rx_cache_malloc (cache, size);
534}
535
536
537
538#ifdef __STDC__
539static int
540supersetcmp (void * va, void * vb)
541#else
542static int
543supersetcmp (va, vb)
544     void * va;
545     void * vb;
546#endif
547{
548  struct rx_superset * a = (struct rx_superset *)va;
549  struct rx_superset * b = (struct rx_superset *)vb;
550  return (   (a == b)
551          || (a && b && (a->id == b->id) && (a->car == b->car) && (a->cdr == b->cdr)));
552}
553
554#define rx_abs(A) (((A) > 0) ? (A) : -(A))
555
556
557#ifdef __STDC__
558static struct rx_hash_item *
559superset_allocator (struct rx_hash_rules * rules, void * val)
560#else
561static struct rx_hash_item *
562superset_allocator (rules, val)
563     struct rx_hash_rules * rules;
564     void * val;
565#endif
566{
567  struct rx_cache * cache;
568  struct rx_superset * template;
569  struct rx_superset * newset;
570
571  cache = ((struct rx_cache *)
572           ((char *)rules
573            - (unsigned long)(&((struct rx_cache *)0)->superset_hash_rules)));
574  template = (struct rx_superset *)val;
575  newset = ((struct rx_superset *)
576            rx_cache_malloc (cache, sizeof (*template)));
577  if (!newset)
578    return 0;
579  {
580    int cdrfinal;
581    int cdredges;
582
583    cdrfinal = (template->cdr
584                ? template->cdr->is_final
585                : 0);
586    cdredges = (template->cdr
587                ? template->cdr->has_cset_edges
588                : 0);
589   
590    newset->is_final = (rx_abs (template->car->is_final) > rx_abs(cdrfinal)
591                        ? template->car->is_final
592                        : template->cdr->is_final);
593    newset->has_cset_edges = (template->car->has_cset_edges || cdredges);
594  }
595  newset->refs = 0;
596  newset->id = template->id;
597  newset->car = template->car;
598  newset->cdr = template->cdr;
599  rx_protect_superset (rx, template->cdr);
600  newset->superstate = 0;
601  newset->starts_for = 0;
602  newset->hash_item.data = (void *)newset;
603  newset->hash_item.binding = 0;
604  return &newset->hash_item;
605}
606
607#ifdef __STDC__
608static struct rx_hash *
609super_hash_allocator (struct rx_hash_rules * rules)
610#else
611static struct rx_hash *
612super_hash_allocator (rules)
613     struct rx_hash_rules * rules;
614#endif
615{
616  struct rx_cache * cache;
617  cache = ((struct rx_cache *)
618           ((char *)rules
619            - (unsigned long)(&((struct rx_cache *)0)->superset_hash_rules)));
620  return ((struct rx_hash *)
621          rx_cache_malloc (cache, sizeof (struct rx_hash)));
622}
623
624
625#ifdef __STDC__
626static void
627super_hash_liberator (struct rx_hash * hash, struct rx_hash_rules * rules)
628#else
629static void
630super_hash_liberator (hash, rules)
631     struct rx_hash * hash;
632     struct rx_hash_rules * rules;
633#endif
634{
635  struct rx_cache * cache
636    = ((struct rx_cache *)
637       (char *)rules - (long)(&((struct rx_cache *)0)->superset_hash_rules));
638  rx_cache_free (cache, sizeof (struct rx_hash), (char *)hash);
639}
640
641#ifdef __STDC__
642static void
643superset_hash_item_liberator (struct rx_hash_item * it,
644                              struct rx_hash_rules * rules)
645#else
646static void
647superset_hash_item_liberator (it, rules)
648     struct rx_hash_item * it;
649     struct rx_hash_rules * rules;
650#endif
651{
652}
653
654int rx_cache_bound = 3;
655static int rx_default_cache_got = 0;
656
657static struct rx_cache default_cache =
658{
659  {
660    supersetcmp,
661    super_hash_allocator,
662    super_hash_liberator,
663    superset_allocator,
664    superset_hash_item_liberator,
665  },
666  0,
667  0,
668  0,
669  0,
670  0,
671  0,
672  0,
673  RX_DEFAULT_DFA_CACHE_SIZE,
674  0,
675  256,
676  rx_id_instruction_table,
677  {
678    0,
679    0,
680    0,
681    0,
682    0
683  }
684};
685struct rx_cache * rx_default_cache = &default_cache;
686
687/* This adds an element to a superstate set.  These sets are lists, such
688 * that lists with == elements are ==.  The empty set is returned by
689 * superset_cons (rx, 0, 0) and is NOT equivelent to
690 * (struct rx_superset)0.
691 */
692
693#ifdef __STDC__
694struct rx_superset *
695rx_superset_cons (struct rx * rx,
696                  struct rx_nfa_state *car, struct rx_superset *cdr)
697#else
698struct rx_superset *
699rx_superset_cons (rx, car, cdr)
700     struct rx * rx;
701     struct rx_nfa_state *car;
702     struct rx_superset *cdr;
703#endif
704{
705  struct rx_cache * cache = rx->cache;
706  if (!car && !cdr)
707    {
708      if (!cache->empty_superset)
709        {
710          cache->empty_superset
711            = ((struct rx_superset *)
712               rx_cache_malloc (cache, sizeof (struct rx_superset)));
713          if (!cache->empty_superset)
714            return 0;
715          rx_bzero ((char *)cache->empty_superset, sizeof (struct rx_superset));
716          cache->empty_superset->refs = 1000;
717        }
718      return cache->empty_superset;
719    }
720  {
721    struct rx_superset template;
722    struct rx_hash_item * hit;
723    template.car = car;
724    template.cdr = cdr;
725    template.id = rx->rx_id;
726    rx_protect_superset (rx, template.cdr);
727    hit = rx_hash_store (&cache->superset_table,
728                         (unsigned long)car ^ car->id ^ (unsigned long)cdr,
729                         (void *)&template,
730                         &cache->superset_hash_rules);
731    rx_protect_superset (rx, template.cdr);
732    return (hit
733            ?  (struct rx_superset *)hit->data
734            : 0);
735  }
736}
737
738/* This computes a union of two NFA state sets.  The sets do not have the
739 * same representation though.  One is a RX_SUPERSET structure (part
740 * of the superstate NFA) and the other is an NFA_STATE_SET (part of the NFA).
741 */
742
743#ifdef __STDC__
744struct rx_superset *
745rx_superstate_eclosure_union (struct rx * rx, struct rx_superset *set, struct rx_nfa_state_set *ecl)
746#else
747struct rx_superset *
748rx_superstate_eclosure_union (rx, set, ecl)
749     struct rx * rx;
750     struct rx_superset *set;
751     struct rx_nfa_state_set *ecl;
752#endif
753{
754  if (!ecl)
755    return set;
756
757  if (!set->car)
758    return rx_superset_cons (rx, ecl->car,
759                             rx_superstate_eclosure_union (rx, set, ecl->cdr));
760  if (set->car == ecl->car)
761    return rx_superstate_eclosure_union (rx, set, ecl->cdr);
762
763  {
764    struct rx_superset * tail;
765    struct rx_nfa_state * first;
766
767    if (set->car->id < ecl->car->id)
768      {
769        tail = rx_superstate_eclosure_union (rx, set->cdr, ecl);
770        first = set->car;
771      }
772    else
773      {
774        tail = rx_superstate_eclosure_union (rx, set, ecl->cdr);
775        first = ecl->car;
776      }
777    if (!tail)
778      return 0;
779    else
780      {
781        struct rx_superset * answer;
782        answer = rx_superset_cons (rx, first, tail);
783        if (!answer)
784          {
785            rx_protect_superset (rx, tail);
786            rx_release_superset (rx, tail);
787            return 0;
788          }
789        else
790          return answer;
791      }
792  }
793}
794
795
796
797
798/*
799 * This makes sure that a list of rx_distinct_futures contains
800 * a future for each possible set of side effects in the eclosure
801 * of a given state.  This is some of the work of filling in a
802 * superstate transition.
803 */
804
805#ifdef __STDC__
806static struct rx_distinct_future *
807include_futures (struct rx *rx,
808                 struct rx_distinct_future *df, struct rx_nfa_state
809                 *state, struct rx_superstate *superstate)
810#else
811static struct rx_distinct_future *
812include_futures (rx, df, state, superstate)
813     struct rx *rx;
814     struct rx_distinct_future *df;
815     struct rx_nfa_state *state;
816     struct rx_superstate *superstate;
817#endif
818{
819  struct rx_possible_future *future;
820  struct rx_cache * cache = rx->cache;
821  for (future = rx_state_possible_futures (rx, state); future; future = future->next)
822    {
823      struct rx_distinct_future *dfp;
824      struct rx_distinct_future *insert_before = 0;
825      if (df)
826        df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
827      for (dfp = df; dfp; dfp = dfp->next_same_super_edge[0])
828        if (dfp->effects == future->effects)
829          break;
830        else
831          {
832            int order = rx->se_list_cmp (rx, dfp->effects, future->effects);
833            if (order > 0)
834              {
835                insert_before = dfp;
836                dfp = 0;
837                break;
838              }
839          }
840      if (df)
841        df->next_same_super_edge[1]->next_same_super_edge[0] = df;
842      if (!dfp)
843        {
844          dfp
845            = ((struct rx_distinct_future *)
846               rx_cache_malloc (cache,
847                                sizeof (struct rx_distinct_future)));
848          if (!dfp)
849            return 0;
850          if (!df)
851            {
852              df = insert_before = dfp;
853              df->next_same_super_edge[0] = df->next_same_super_edge[1] = df;
854            }
855          else if (!insert_before)
856            insert_before = df;
857          else if (insert_before == df)
858            df = dfp;
859
860          dfp->next_same_super_edge[0] = insert_before;
861          dfp->next_same_super_edge[1]
862            = insert_before->next_same_super_edge[1];
863          dfp->next_same_super_edge[1]->next_same_super_edge[0] = dfp;
864          dfp->next_same_super_edge[0]->next_same_super_edge[1] = dfp;
865          dfp->next_same_dest = dfp->prev_same_dest = dfp;
866          dfp->future = 0;
867          dfp->present = superstate;
868          dfp->future_frame.inx = rx->instruction_table[rx_cache_miss];
869          dfp->future_frame.data = 0;
870          dfp->future_frame.data_2 = (void *) dfp;
871          dfp->side_effects_frame.inx
872            = rx->instruction_table[rx_do_side_effects];
873          dfp->side_effects_frame.data = 0;
874          dfp->side_effects_frame.data_2 = (void *) dfp;
875          dfp->effects = future->effects;
876        }
877    }
878  return df;
879}
880
881
882
883/* This constructs a new superstate from its state set.  The only
884 * complexity here is memory management.
885 */
886#ifdef __STDC__
887struct rx_superstate *
888rx_superstate (struct rx *rx,
889               struct rx_superset *set)
890#else
891struct rx_superstate *
892rx_superstate (rx, set)
893     struct rx *rx;
894     struct rx_superset *set;
895#endif
896{
897  struct rx_cache * cache = rx->cache;
898  struct rx_superstate * superstate = 0;
899
900  /* Does the superstate already exist in the cache? */
901  if (set->superstate)
902    {
903      if (set->superstate->rx_id != rx->rx_id)
904        {
905          /* Aha.  It is in the cache, but belongs to a superstate
906           * that refers to an NFA that no longer exists.
907           * (We know it no longer exists because it was evidently
908           *  stored in the same region of memory as the current nfa
909           *  yet it has a different id.)
910           */
911          superstate = set->superstate;
912          if (!superstate->is_semifree)
913            {
914              if (cache->lru_superstate == superstate)
915                {
916                  cache->lru_superstate = superstate->next_recyclable;
917                  if (cache->lru_superstate == superstate)
918                    cache->lru_superstate = 0;
919                }
920              {
921                superstate->next_recyclable->prev_recyclable
922                  = superstate->prev_recyclable;
923                superstate->prev_recyclable->next_recyclable
924                  = superstate->next_recyclable;
925                if (!cache->semifree_superstate)
926                  {
927                    (cache->semifree_superstate
928                     = superstate->next_recyclable
929                     = superstate->prev_recyclable
930                     = superstate);
931                  }
932                else
933                  {
934                    superstate->next_recyclable = cache->semifree_superstate;
935                    superstate->prev_recyclable
936                      = cache->semifree_superstate->prev_recyclable;
937                    superstate->next_recyclable->prev_recyclable
938                      = superstate;
939                    superstate->prev_recyclable->next_recyclable
940                      = superstate;
941                    cache->semifree_superstate = superstate;
942                  }
943                ++cache->semifree_superstates;
944              }
945            }
946          set->superstate = 0;
947          goto handle_cache_miss;
948        }
949      ++cache->hits;
950      superstate = set->superstate;
951
952      rx_refresh_this_superstate (cache, superstate);
953      return superstate;
954    }
955
956 handle_cache_miss:
957
958  /* This point reached only for cache misses. */
959  ++cache->misses;
960#if RX_DEBUG
961  if (rx_debug_trace > 1)
962    {
963      struct rx_superset * setp = set;
964      fprintf (stderr, "Building a superstet %d(%d): ", rx->rx_id, set);
965      while (setp)
966        {
967          fprintf (stderr, "%d ", setp->id);
968          setp = setp->cdr;
969        }
970      fprintf (stderr, "(%d)\n", set);
971    }
972#endif
973
974  {
975    int superstate_size;
976    superstate_size = (  sizeof (*superstate)
977                       + (  sizeof (struct rx_inx)
978                          * rx->local_cset_size));
979    superstate = ((struct rx_superstate *)
980                  rx_cache_malloc_or_get (cache, superstate_size));
981    ++cache->superstates;
982  }                                                           
983
984  if (!superstate)
985    return 0;
986
987  if (!cache->lru_superstate)
988    (cache->lru_superstate
989     = superstate->next_recyclable
990     = superstate->prev_recyclable
991     = superstate);
992  else
993    {
994      superstate->next_recyclable = cache->lru_superstate;
995      superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
996      (  superstate->prev_recyclable->next_recyclable
997       = superstate->next_recyclable->prev_recyclable
998       = superstate);
999    }
1000  superstate->rx_id = rx->rx_id;
1001  superstate->transition_refs = 0;
1002  superstate->locks = 0;
1003  superstate->is_semifree = 0;
1004  set->superstate = superstate;
1005  superstate->contents = set;
1006  rx_protect_superset (rx, set);
1007  superstate->edges = 0;
1008  {
1009    int x;
1010    /* None of the transitions from this superstate are known yet. */
1011    for (x = 0; x < rx->local_cset_size; ++x)
1012      {
1013        struct rx_inx * ifr = &superstate->transitions[x];
1014        ifr->inx = rx->instruction_table [rx_cache_miss];
1015        ifr->data = ifr->data_2 = 0;
1016      }
1017  }
1018  return superstate;
1019}
1020
1021
1022/* This computes the destination set of one edge of the superstate NFA.
1023 * Note that a RX_DISTINCT_FUTURE is a superstate edge.
1024 * Returns 0 on an allocation failure.
1025 */
1026
1027#ifdef __STDC__
1028static int
1029solve_destination (struct rx *rx, struct rx_distinct_future *df)
1030#else
1031static int
1032solve_destination (rx, df)
1033     struct rx *rx;
1034     struct rx_distinct_future *df;
1035#endif
1036{
1037  struct rx_super_edge *tc = df->edge;
1038  struct rx_superset *nfa_state;
1039  struct rx_superset *nil_set = rx_superset_cons (rx, 0, 0);
1040  struct rx_superset *solution = nil_set;
1041  struct rx_superstate *dest;
1042
1043  rx_protect_superset (rx, solution);
1044  /* Iterate over all NFA states in the state set of this superstate. */
1045  for (nfa_state = df->present->contents;
1046       nfa_state->car;
1047       nfa_state = nfa_state->cdr)
1048    {
1049      struct rx_nfa_edge *e;
1050      /* Iterate over all edges of each NFA state. */
1051      for (e = nfa_state->car->edges; e; e = e->next)
1052        /* If we find an edge that is labeled with
1053         * the characters we are solving for.....
1054         */
1055        if ((e->type == ne_cset)
1056            && rx_bitset_is_subset (rx->local_cset_size,
1057                                    tc->cset,
1058                                    e->params.cset))
1059          {
1060            struct rx_nfa_state *n = e->dest;
1061            struct rx_possible_future *pf;
1062            /* ....search the partial epsilon closures of the destination
1063             * of that edge for a path that involves the same set of
1064             * side effects we are solving for.
1065             * If we find such a RX_POSSIBLE_FUTURE, we add members to the
1066             * stateset we are computing.
1067             */
1068            for (pf = rx_state_possible_futures (rx, n); pf; pf = pf->next)
1069              if (pf->effects == df->effects)
1070                {
1071                  struct rx_superset * old_sol;
1072                  old_sol = solution;
1073                  solution = rx_superstate_eclosure_union (rx, solution,
1074                                                           pf->destset);
1075                  if (!solution)
1076                    return 0;
1077                  rx_protect_superset (rx, solution);
1078                  rx_release_superset (rx, old_sol);
1079                }
1080          }
1081    }
1082  /* It is possible that the RX_DISTINCT_FUTURE we are working on has
1083   * the empty set of NFA states as its definition.  In that case, this
1084   * is a failure point.
1085   */
1086  if (solution == nil_set)
1087    {
1088      df->future_frame.inx = (void *) rx_backtrack;
1089      df->future_frame.data = 0;
1090      df->future_frame.data_2 = 0;
1091      return 1;
1092    }
1093  dest = rx_superstate (rx, solution);
1094  rx_release_superset (rx, solution);
1095  if (!dest)
1096    return 0;
1097
1098  {
1099    struct rx_distinct_future *dft;
1100    dft = df;
1101    df->prev_same_dest->next_same_dest = 0;
1102    while (dft)
1103      {
1104        dft->future = dest;
1105        dft->future_frame.inx = rx->instruction_table[rx_next_char];
1106        dft->future_frame.data = (void *) dest->transitions;
1107        dft->future_frame.data_2 = (void *) dest->contents->is_final;
1108        dft = dft->next_same_dest;
1109      }
1110    df->prev_same_dest->next_same_dest = df;
1111  }
1112  if (!dest->transition_refs)
1113    dest->transition_refs = df;
1114  else
1115    {
1116      struct rx_distinct_future *dft;
1117      dft = dest->transition_refs->next_same_dest;
1118      dest->transition_refs->next_same_dest = df->next_same_dest;
1119      df->next_same_dest->prev_same_dest = dest->transition_refs;
1120      df->next_same_dest = dft;
1121      dft->prev_same_dest = df;
1122    }
1123  return 1;
1124}
1125
1126
1127/* This takes a superstate and a character, and computes some edges
1128 * from the superstate NFA.  In particular, this computes all edges
1129 * that lead from SUPERSTATE given CHR.   This function also
1130 * computes the set of characters that share this edge set.
1131 * This returns 0 on allocation error.
1132 * The character set and list of edges are returned through
1133 * the paramters CSETOUT and DFOUT.
1134 */
1135
1136#ifdef __STDC__
1137static int
1138compute_super_edge (struct rx *rx, struct rx_distinct_future **dfout,
1139                          rx_Bitset csetout, struct rx_superstate *superstate,
1140                          unsigned char chr) 
1141#else
1142static int
1143compute_super_edge (rx, dfout, csetout, superstate, chr)
1144     struct rx *rx;
1145     struct rx_distinct_future **dfout;
1146     rx_Bitset csetout;
1147     struct rx_superstate *superstate;
1148     unsigned char chr;
1149#endif
1150{
1151  struct rx_superset *stateset = superstate->contents;
1152
1153  /* To compute the set of characters that share edges with CHR,
1154   * we start with the full character set, and subtract.
1155   */
1156  rx_bitset_universe (rx->local_cset_size, csetout);
1157  *dfout = 0;
1158
1159  /* Iterate over the NFA states in the superstate state-set. */
1160  while (stateset->car)
1161    {
1162      struct rx_nfa_edge *e;
1163      for (e = stateset->car->edges; e; e = e->next)
1164        if (e->type == ne_cset)
1165          {
1166            if (!RX_bitset_member (e->params.cset, chr))
1167              /* An edge that doesn't apply at least tells us some characters
1168               * that don't share the same edge set as CHR.
1169               */
1170              rx_bitset_difference (rx->local_cset_size, csetout, e->params.cset);
1171            else
1172              {
1173                /* If we find an NFA edge that applies, we make sure there
1174                 * are corresponding edges in the superstate NFA.
1175                 */
1176                {
1177                  struct rx_distinct_future * saved;
1178                  saved = *dfout;
1179                  *dfout = include_futures (rx, *dfout, e->dest, superstate);
1180                  if (!*dfout)
1181                    {
1182                      struct rx_distinct_future * df;
1183                      df = saved;
1184                      if (df)
1185                        df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
1186                      while (df)
1187                        {
1188                          struct rx_distinct_future *dft;
1189                          dft = df;
1190                          df = df->next_same_super_edge[0];
1191
1192                          if (dft->future && dft->future->transition_refs == dft)
1193                            {
1194                              dft->future->transition_refs = dft->next_same_dest;
1195                              if (dft->future->transition_refs == dft)
1196                                dft->future->transition_refs = 0;
1197                            }
1198                          dft->next_same_dest->prev_same_dest = dft->prev_same_dest;
1199                          dft->prev_same_dest->next_same_dest = dft->next_same_dest;
1200                          rx_cache_free (rx->cache, sizeof (*dft), (char *)dft);
1201                        }
1202                      return 0;
1203                    }
1204                }
1205                /* We also trim the character set a bit. */
1206                rx_bitset_intersection (rx->local_cset_size,
1207                                        csetout, e->params.cset);
1208              }
1209          }
1210      stateset = stateset->cdr;
1211    }
1212  return 1;
1213}
1214
1215
1216/* This is a constructor for RX_SUPER_EDGE structures.  These are
1217 * wrappers for lists of superstate NFA edges that share character sets labels.
1218 * If a transition class contains more than one rx_distinct_future (superstate
1219 * edge), then it represents a non-determinism in the superstate NFA.
1220 */
1221
1222#ifdef __STDC__
1223static struct rx_super_edge *
1224rx_super_edge (struct rx *rx,
1225               struct rx_superstate *super, rx_Bitset cset,
1226               struct rx_distinct_future *df)
1227#else
1228static struct rx_super_edge *
1229rx_super_edge (rx, super, cset, df)
1230     struct rx *rx;
1231     struct rx_superstate *super;
1232     rx_Bitset cset;
1233     struct rx_distinct_future *df;
1234#endif
1235{
1236  struct rx_super_edge *tc;
1237  int tc_size;
1238
1239  tc_size = (  sizeof (struct rx_super_edge)
1240             + rx_sizeof_bitset (rx->local_cset_size));
1241
1242  tc = ((struct rx_super_edge *)
1243        rx_cache_malloc (rx->cache, tc_size));
1244
1245  if (!tc)
1246    return 0;
1247
1248  tc->next = super->edges;
1249  super->edges = tc;
1250  tc->rx_backtrack_frame.inx = rx->instruction_table[rx_backtrack_point];
1251  tc->rx_backtrack_frame.data = 0;
1252  tc->rx_backtrack_frame.data_2 = (void *) tc;
1253  tc->options = df;
1254  tc->cset = (rx_Bitset) ((char *) tc + sizeof (*tc));
1255  rx_bitset_assign (rx->local_cset_size, tc->cset, cset);
1256  if (df)
1257    {
1258      struct rx_distinct_future * dfp = df;
1259      df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
1260      while (dfp)
1261        {
1262          dfp->edge = tc;
1263          dfp = dfp->next_same_super_edge[0];
1264        }
1265      df->next_same_super_edge[1]->next_same_super_edge[0] = df;
1266    }
1267  return tc;
1268}
1269
1270
1271/* There are three kinds of cache miss.  The first occurs when a
1272 * transition is taken that has never been computed during the
1273 * lifetime of the source superstate.  That cache miss is handled by
1274 * calling COMPUTE_SUPER_EDGE.  The second kind of cache miss
1275 * occurs when the destination superstate of a transition doesn't
1276 * exist.  SOLVE_DESTINATION is used to construct the destination superstate.
1277 * Finally, the third kind of cache miss occurs when the destination
1278 * superstate of a transition is in a `semi-free state'.  That case is
1279 * handled by UNFREE_SUPERSTATE.
1280 *
1281 * The function of HANDLE_CACHE_MISS is to figure out which of these
1282 * cases applies.
1283 */
1284
1285#ifdef __STDC__
1286static void
1287install_partial_transition  (struct rx_superstate *super,
1288                             struct rx_inx *answer,
1289                             RX_subset set, int offset)
1290#else
1291static void
1292install_partial_transition  (super, answer, set, offset)
1293     struct rx_superstate *super;
1294     struct rx_inx *answer;
1295     RX_subset set;
1296     int offset;
1297#endif
1298{
1299  int start = offset;
1300  int end = start + 32;
1301  RX_subset pos = 1;
1302  struct rx_inx * transitions = super->transitions;
1303 
1304  while (start < end)
1305    {
1306      if (set & pos)
1307        transitions[start] = *answer;
1308      pos <<= 1;
1309      ++start;
1310    }
1311}
1312
1313
1314#ifdef __STDC__
1315struct rx_inx *
1316rx_handle_cache_miss (struct rx *rx, struct rx_superstate *super, unsigned char chr, void *data)
1317#else
1318struct rx_inx *
1319rx_handle_cache_miss (rx, super, chr, data)
1320     struct rx *rx;
1321     struct rx_superstate *super;
1322     unsigned char chr;
1323     void *data;
1324#endif
1325{
1326  int offset = chr / RX_subset_bits;
1327  struct rx_distinct_future *df = data;
1328
1329  if (!df)                      /* must be the shared_cache_miss_frame */
1330    {
1331      /* Perhaps this is just a transition waiting to be filled. */
1332      struct rx_super_edge *tc;
1333      RX_subset mask = rx_subset_singletons [chr % RX_subset_bits];
1334
1335      for (tc = super->edges; tc; tc = tc->next)
1336        if (tc->cset[offset] & mask)
1337          {
1338            struct rx_inx * answer;
1339            df = tc->options;
1340            answer = ((tc->options->next_same_super_edge[0] != tc->options)
1341                      ? &tc->rx_backtrack_frame
1342                      : (df->effects
1343                         ? &df->side_effects_frame
1344                         : &df->future_frame));
1345            install_partial_transition (super, answer,
1346                                        tc->cset [offset], offset * 32);
1347            return answer;
1348          }
1349      /* Otherwise, it's a flushed or  newly encountered edge. */
1350      {
1351        char cset_space[1024];  /* this limit is far from unreasonable */
1352        rx_Bitset trcset;
1353        struct rx_inx *answer;
1354
1355        if (rx_sizeof_bitset (rx->local_cset_size) > sizeof (cset_space))
1356          return 0;             /* If the arbitrary limit is hit, always fail */
1357                                /* cleanly. */
1358        trcset = (rx_Bitset)cset_space;
1359        rx_lock_superstate (rx, super);
1360        if (!compute_super_edge (rx, &df, trcset, super, chr))
1361          {
1362            rx_unlock_superstate (rx, super);
1363            return 0;
1364          }
1365        if (!df)                /* We just computed the fail transition. */
1366          {
1367            static struct rx_inx
1368              shared_fail_frame = { 0, 0, (void *)rx_backtrack, 0 };
1369            answer = &shared_fail_frame;
1370          }
1371        else
1372          {
1373            tc = rx_super_edge (rx, super, trcset, df);
1374            if (!tc)
1375              {
1376                rx_unlock_superstate (rx, super);
1377                return 0;
1378              }
1379            answer = ((tc->options->next_same_super_edge[0] != tc->options)
1380                      ? &tc->rx_backtrack_frame
1381                      : (df->effects
1382                         ? &df->side_effects_frame
1383                         : &df->future_frame));
1384          }
1385        install_partial_transition (super, answer,
1386                                    trcset[offset], offset * 32);
1387        rx_unlock_superstate (rx, super);
1388        return answer;
1389      }
1390    }
1391  else if (df->future) /* A cache miss on an edge with a future? Must be
1392                        * a semi-free destination. */
1393    {                           
1394      if (df->future->is_semifree)
1395        refresh_semifree_superstate (rx->cache, df->future);
1396      return &df->future_frame;
1397    }
1398  else
1399    /* no future superstate on an existing edge */
1400    {
1401      rx_lock_superstate (rx, super);
1402      if (!solve_destination (rx, df))
1403        {
1404          rx_unlock_superstate (rx, super);
1405          return 0;
1406        }
1407      if (!df->effects
1408          && (df->edge->options->next_same_super_edge[0] == df->edge->options))
1409        install_partial_transition (super, &df->future_frame,
1410                                    df->edge->cset[offset], offset * 32);
1411      rx_unlock_superstate (rx, super);
1412      return &df->future_frame;
1413    }
1414}
1415
1416
Note: See TracBrowser for help on using the repository browser.