source: trunk/third/ispell/lookup.c @ 10334

Revision 10334, 13.7 KB checked in by ghudson, 27 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r10333, which included commits to RCS files with non-trunk default branches.
Line 
1#ifndef lint
2static char Rcs_Id[] =
3    "$Id: lookup.c,v 1.1.1.1 1997-09-03 21:08:12 ghudson Exp $";
4#endif
5
6/*
7 * lookup.c - see if a word appears in the dictionary
8 *
9 * Pace Willisson, 1983
10 *
11 * Copyright 1987, 1988, 1989, 1992, 1993, Geoff Kuenning, Granada Hills, CA
12 * All rights reserved.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 *
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All modifications to the source code must be clearly marked as
24 *    such.  Binary redistributions based on modified source code
25 *    must be clearly marked as modified versions in the documentation
26 *    and/or other materials provided with the distribution.
27 * 4. All advertising materials mentioning features or use of this software
28 *    must display the following acknowledgment:
29 *      This product includes software developed by Geoff Kuenning and
30 *      other unpaid contributors.
31 * 5. The name of Geoff Kuenning may not be used to endorse or promote
32 *    products derived from this software without specific prior
33 *    written permission.
34 *
35 * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
36 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
38 * ARE DISCLAIMED.  IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
39 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
40 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
41 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
42 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
43 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
44 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
45 * SUCH DAMAGE.
46 */
47
48/*
49 * $Log: not supported by cvs2svn $
50 * Revision 1.42  1995/01/08  23:23:42  geoff
51 * Support MSDOS_BINARY_OPEN when opening the hash file to read it in.
52 *
53 * Revision 1.41  1994/01/25  07:11:51  geoff
54 * Get rid of all old RCS log lines in preparation for the 3.1 release.
55 *
56 */
57
58#include "config.h"
59#include "ispell.h"
60#include "proto.h"
61#include "msgs.h"
62
63int             linit P ((void));
64#ifdef INDEXDUMP
65static void     dumpindex P ((struct flagptr * indexp, int depth));
66#endif /* INDEXDUMP */
67struct dent *   lookup P ((ichar_t * word, int dotree));
68
69static          inited = 0;
70
71int linit ()
72    {
73    int                 hashfd;
74    register int        i;
75    register struct dent * dp;
76    struct flagent *    entry;
77    struct flagptr *    ind;
78    int                 nextchar;
79    int                 viazero;
80    register ichar_t *  cp;
81
82    if (inited)
83        return 0;
84
85    if ((hashfd = open (hashname, 0 | MSDOS_BINARY_OPEN)) < 0)
86        {
87        (void) fprintf (stderr, CANT_OPEN, hashname);
88        return (-1);
89        }
90
91    hashsize = read (hashfd, (char *) &hashheader, sizeof hashheader);
92    if (hashsize < sizeof hashheader)
93        {
94        if (hashsize < 0)
95            (void) fprintf (stderr, LOOKUP_C_CANT_READ, hashname);
96        else if (hashsize == 0)
97            (void) fprintf (stderr, LOOKUP_C_NULL_HASH, hashname);
98        else
99            (void) fprintf (stderr,
100              LOOKUP_C_SHORT_HASH (hashname, hashsize,
101                (int) sizeof hashheader));
102        return (-1);
103        }
104    else if (hashheader.magic != MAGIC)
105        {
106        (void) fprintf (stderr,
107          LOOKUP_C_BAD_MAGIC (hashname, (unsigned int) MAGIC,
108            (unsigned int) hashheader.magic));
109        return (-1);
110        }
111    else if (hashheader.magic2 != MAGIC)
112        {
113        (void) fprintf (stderr,
114          LOOKUP_C_BAD_MAGIC2 (hashname, (unsigned int) MAGIC,
115            (unsigned int) hashheader.magic2));
116        return (-1);
117        }
118    else if (hashheader.compileoptions != COMPILEOPTIONS
119      ||  hashheader.maxstringchars != MAXSTRINGCHARS
120      ||  hashheader.maxstringcharlen != MAXSTRINGCHARLEN)
121        {
122        (void) fprintf (stderr,
123          LOOKUP_C_BAD_OPTIONS ((unsigned int) hashheader.compileoptions,
124            hashheader.maxstringchars, hashheader.maxstringcharlen,
125            (unsigned int) COMPILEOPTIONS, MAXSTRINGCHARS, MAXSTRINGCHARLEN));
126        return (-1);
127        }
128    if (nodictflag)
129        {
130        /*
131         * Dictionary is not needed - create an empty dummy table.  We
132         * actually have to have one entry since the hash
133         * algorithm involves a divide by the table size
134         * (actually modulo, but zero is still unacceptable).
135         * So we create an empty entry.
136         */
137        hashsize = 1;           /* This prevents divides by zero */
138        hashtbl = (struct dent *) calloc (1, sizeof (struct dent));
139        if (hashtbl == NULL)
140            {
141            (void) fprintf (stderr, LOOKUP_C_NO_HASH_SPACE);
142            return (-1);
143            }
144        hashtbl[0].word = NULL;
145        hashtbl[0].next = NULL;
146        hashtbl[0].flagfield &= ~(USED | KEEP);
147        /* The flag bits don't matter, but calloc cleared them. */
148        hashstrings = (char *) malloc ((unsigned) hashheader.lstringsize);
149        }
150    else
151        {
152        hashtbl =
153         (struct dent *)
154            malloc ((unsigned) hashheader.tblsize * sizeof (struct dent));
155        hashsize = hashheader.tblsize;
156        hashstrings = (char *) malloc ((unsigned) hashheader.stringsize);
157        }
158    numsflags = hashheader.stblsize;
159    numpflags = hashheader.ptblsize;
160    sflaglist = (struct flagent *)
161      malloc ((numsflags + numpflags) * sizeof (struct flagent));
162    if (hashtbl == NULL  ||  hashstrings == NULL  ||  sflaglist == NULL)
163        {
164        (void) fprintf (stderr, LOOKUP_C_NO_HASH_SPACE);
165        return (-1);
166        }
167    pflaglist = sflaglist + numsflags;
168
169    if (nodictflag)
170        {
171        /*
172         * Read just the strings for the language table, and
173         * skip over the rest of the strings and all of the
174         * hash table.
175         */
176        if (read (hashfd, hashstrings, (unsigned) hashheader.lstringsize)
177          != hashheader.lstringsize)
178            {
179            (void) fprintf (stderr, LOOKUP_C_BAD_FORMAT);
180            return (-1);
181            }
182        (void) lseek (hashfd,
183          (long) hashheader.stringsize - (long) hashheader.lstringsize
184            + (long) hashheader.tblsize * (long) sizeof (struct dent),
185          1);
186        }
187    else
188        {
189        if (read (hashfd, hashstrings, (unsigned) hashheader.stringsize)
190            != hashheader.stringsize
191          ||  read (hashfd, (char *) hashtbl,
192              (unsigned) hashheader.tblsize * sizeof (struct dent))
193            != hashheader.tblsize * sizeof (struct dent))
194            {
195            (void) fprintf (stderr, LOOKUP_C_BAD_FORMAT);
196            return (-1);
197            }
198        }
199    if (read (hashfd, (char *) sflaglist,
200        (unsigned) (numsflags + numpflags) * sizeof (struct flagent))
201      != (numsflags + numpflags) * sizeof (struct flagent))
202        {
203        (void) fprintf (stderr, LOOKUP_C_BAD_FORMAT);
204        return (-1);
205        }
206    (void) close (hashfd);
207
208    if (!nodictflag)
209        {
210        for (i = hashsize, dp = hashtbl;  --i >= 0;  dp++)
211            {
212            if (dp->word == (char *) -1)
213                dp->word = NULL;
214            else
215                dp->word = &hashstrings [ (int)(dp->word) ];
216            if (dp->next == (struct dent *) -1)
217                dp->next = NULL;
218            else
219                dp->next = &hashtbl [ (int)(dp->next) ];
220            }
221        }
222
223    for (i = numsflags + numpflags, entry = sflaglist; --i >= 0; entry++)
224        {
225        if (entry->stripl)
226            entry->strip = (ichar_t *) &hashstrings[(int) entry->strip];
227        else
228            entry->strip = NULL;
229        if (entry->affl)
230            entry->affix = (ichar_t *) &hashstrings[(int) entry->affix];
231        else
232            entry->affix = NULL;
233        }
234    /*
235    ** Warning - 'entry' and 'i' are reset in the body of the loop
236    ** below.  Don't try to optimize it by (e.g.) moving the decrement
237    ** of i into the loop condition.
238    */
239    for (i = numsflags, entry = sflaglist;  i > 0;  i--, entry++)
240        {
241        if (entry->affl == 0)
242            {
243            cp = NULL;
244            ind = &sflagindex[0];
245            viazero = 1;
246            }
247        else
248            {
249            cp = entry->affix + entry->affl - 1;
250            ind = &sflagindex[*cp];
251            viazero = 0;
252            while (ind->numents == 0  &&  ind->pu.fp != NULL)
253                {
254                if (cp == entry->affix)
255                    {
256                    ind = &ind->pu.fp[0];
257                    viazero = 1;
258                    }
259                else
260                    {
261                    ind = &ind->pu.fp[*--cp];
262                    viazero = 0;
263                    }
264                }
265            }
266        if (ind->numents == 0)
267            ind->pu.ent = entry;
268        ind->numents++;
269        /*
270        ** If this index entry has more than MAXSEARCH flags in
271        ** it, we will split it into subentries to reduce the
272        ** searching.  However, the split doesn't make sense in
273        ** two cases:  (a) if we are already at the end of the
274        ** current affix, or (b) if all the entries in the list
275        ** have identical affixes.  Since the list is sorted, (b)
276        ** is true if the first and last affixes in the list
277        ** are identical.
278        */
279        if (!viazero  &&  ind->numents >= MAXSEARCH
280          &&  icharcmp (entry->affix, ind->pu.ent->affix) != 0)
281            {
282            /* Sneaky trick:  back up and reprocess */
283            entry = ind->pu.ent - 1; /* -1 is for entry++ in loop */
284            i = numsflags - (entry - sflaglist);
285            ind->pu.fp =
286              (struct flagptr *)
287                calloc ((unsigned) (SET_SIZE + hashheader.nstrchars),
288                  sizeof (struct flagptr));
289            if (ind->pu.fp == NULL)
290                {
291                (void) fprintf (stderr, LOOKUP_C_NO_LANG_SPACE);
292                return (-1);
293                }
294            ind->numents = 0;
295            }
296        }
297    /*
298    ** Warning - 'entry' and 'i' are reset in the body of the loop
299    ** below.  Don't try to optimize it by (e.g.) moving the decrement
300    ** of i into the loop condition.
301    */
302    for (i = numpflags, entry = pflaglist;  i > 0;  i--, entry++)
303        {
304        if (entry->affl == 0)
305            {
306            cp = NULL;
307            ind = &pflagindex[0];
308            viazero = 1;
309            }
310        else
311            {
312            cp = entry->affix;
313            ind = &pflagindex[*cp++];
314            viazero = 0;
315            while (ind->numents == 0  &&  ind->pu.fp != NULL)
316                {
317                if (*cp == 0)
318                    {
319                    ind = &ind->pu.fp[0];
320                    viazero = 1;
321                    }
322                else
323                    {
324                    ind = &ind->pu.fp[*cp++];
325                    viazero = 0;
326                    }
327                }
328            }
329        if (ind->numents == 0)
330            ind->pu.ent = entry;
331        ind->numents++;
332        /*
333        ** If this index entry has more than MAXSEARCH flags in
334        ** it, we will split it into subentries to reduce the
335        ** searching.  However, the split doesn't make sense in
336        ** two cases:  (a) if we are already at the end of the
337        ** current affix, or (b) if all the entries in the list
338        ** have identical affixes.  Since the list is sorted, (b)
339        ** is true if the first and last affixes in the list
340        ** are identical.
341        */
342        if (!viazero  &&  ind->numents >= MAXSEARCH
343          &&  icharcmp (entry->affix, ind->pu.ent->affix) != 0)
344            {
345            /* Sneaky trick:  back up and reprocess */
346            entry = ind->pu.ent - 1; /* -1 is for entry++ in loop */
347            i = numpflags - (entry - pflaglist);
348            ind->pu.fp =
349              (struct flagptr *) calloc (SET_SIZE + hashheader.nstrchars,
350                sizeof (struct flagptr));
351            if (ind->pu.fp == NULL)
352                {
353                (void) fprintf (stderr, LOOKUP_C_NO_LANG_SPACE);
354                return (-1);
355                }
356            ind->numents = 0;
357            }
358        }
359#ifdef INDEXDUMP
360    (void) fprintf (stderr, "Prefix index table:\n");
361    dumpindex (pflagindex, 0);
362    (void) fprintf (stderr, "Suffix index table:\n");
363    dumpindex (sflagindex, 0);
364#endif
365    if (hashheader.nstrchartype == 0)
366        chartypes = NULL;
367    else
368        {
369        chartypes = (struct strchartype *)
370          malloc (hashheader.nstrchartype * sizeof (struct strchartype));
371        if (chartypes == NULL)
372            {
373            (void) fprintf (stderr, LOOKUP_C_NO_LANG_SPACE);
374            return (-1);
375            }
376        for (i = 0, nextchar = hashheader.strtypestart;
377          i < hashheader.nstrchartype;
378          i++)
379            {
380            chartypes[i].name = &hashstrings[nextchar];
381            nextchar += strlen (chartypes[i].name) + 1;
382            chartypes[i].deformatter = &hashstrings[nextchar];
383            nextchar += strlen (chartypes[i].deformatter) + 1;
384            chartypes[i].suffixes = &hashstrings[nextchar];
385            while (hashstrings[nextchar] != '\0')
386                nextchar += strlen (&hashstrings[nextchar]) + 1;
387            nextchar++;
388            }
389        }
390    inited = 1;
391    return (0);
392    }
393
394#ifdef INDEXDUMP
395static void dumpindex (indexp, depth)
396    register struct flagptr *   indexp;
397    register int                depth;
398    {
399    register int                i;
400    int                         j;
401    int                         k;
402    char                        stripbuf[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];
403
404    for (i = 0;  i < SET_SIZE + hashheader.nstrchars;  i++, indexp++)
405        {
406        if (indexp->numents == 0  &&  indexp->pu.fp != NULL)
407            {
408            for (j = depth;  --j >= 0;  )
409                (void) putc (' ', stderr);
410            if (i >= ' '  &&  i <= '~')
411                (void) putc (i, stderr);
412            else
413                (void) fprintf (stderr, "0x%x", i);
414            (void) putc ('\n', stderr);
415            dumpindex (indexp->pu.fp, depth + 1);
416            }
417        else if (indexp->numents)
418            {
419            for (j = depth;  --j >= 0;  )
420                (void) putc (' ', stderr);
421            if (i >= ' '  &&  i <= '~')
422                (void) putc (i, stderr);
423            else
424                (void) fprintf (stderr, "0x%x", i);
425            (void) fprintf (stderr, " -> %d entries\n", indexp->numents);
426            for (k = 0;  k < indexp->numents;  k++)
427                {
428                for (j = depth;  --j >= 0;  )
429                    (void) putc (' ', stderr);
430                if (indexp->pu.ent[k].stripl)
431                    {
432                    (void) ichartostr (stripbuf, indexp->pu.ent[k].strip,
433                      sizeof stripbuf, 1);
434                    (void) fprintf (stderr, "     entry %d (-%s,%s)\n",
435                      &indexp->pu.ent[k] - sflaglist,
436                      stripbuf,
437                      indexp->pu.ent[k].affl
438                        ? ichartosstr (indexp->pu.ent[k].affix, 1) : "-");
439                    }
440                else
441                    (void) fprintf (stderr, "     entry %d (%s)\n",
442                      &indexp->pu.ent[k] - sflaglist,
443                      ichartosstr (indexp->pu.ent[k].affix, 1));
444                }
445            }
446        }
447    }
448#endif
449
450/* n is length of s */
451struct dent * lookup (s, dotree)
452    register ichar_t *          s;
453    int                         dotree;
454    {
455    register struct dent *      dp;
456    register char *             s1;
457    char                        schar[INPUTWORDLEN + MAXAFFIXLEN];
458
459    dp = &hashtbl[hash (s, hashsize)];
460    if (ichartostr (schar, s, sizeof schar, 1))
461        (void) fprintf (stderr, WORD_TOO_LONG (schar));
462    for (  ;  dp != NULL;  dp = dp->next)
463        {
464        /* quick strcmp, but only for equality */
465        s1 = dp->word;
466        if (s1  &&  s1[0] == schar[0]  &&  strcmp (s1 + 1, schar + 1) == 0)
467            return dp;
468#ifndef NO_CAPITALIZATION_SUPPORT
469        while (dp->flagfield & MOREVARIANTS)    /* Skip variations */
470            dp = dp->next;
471#endif
472        }
473    if (dotree)
474        {
475        dp = treelookup (s);
476        return dp;
477        }
478    else
479        return NULL;
480    }
Note: See TracBrowser for help on using the repository browser.