1 | /* |
---|
2 | * tree.c - a hash style dictionary for user's personal words |
---|
3 | * |
---|
4 | * Pace Willisson, 1983 |
---|
5 | * Hash support added by Geoff Kuenning, 1987 |
---|
6 | */ |
---|
7 | |
---|
8 | #include <stdio.h> |
---|
9 | #include <ctype.h> |
---|
10 | #include <sys/param.h> |
---|
11 | #include "ispell.h" |
---|
12 | #include "config.h" |
---|
13 | |
---|
14 | char *getenv(); |
---|
15 | struct dent *lookup(); |
---|
16 | char *upcase(); |
---|
17 | |
---|
18 | static int cantexpand = 0; /* NZ if an expansion fails */ |
---|
19 | static struct dent *htab = NULL; /* Hash table for our stuff */ |
---|
20 | static int hsize = 0; /* Space available in hash table */ |
---|
21 | static int hcount = 0; /* Number of items in hash table */ |
---|
22 | |
---|
23 | /* |
---|
24 | * Hash table sizes. Prime is probably a good idea, though in truth I |
---|
25 | * whipped the algorithm up on the spot rather than looking it up, so |
---|
26 | * who knows what's really best? If we overflow the table, we just |
---|
27 | * use a double-and-add-1 algorithm. |
---|
28 | * |
---|
29 | * The strange pattern in the table is because this table is sometimes |
---|
30 | * used with huge dictionaries, and we want to get the table bigger fast. |
---|
31 | * 23003 just happens to be such that the original dict.191 will fill |
---|
32 | * the table to just under 70%. 31469 is similarly selected for dict.191 |
---|
33 | * combined with /usr/dict/words. The other numbers are on 10000-word |
---|
34 | * intervals starting at 30000. (The table is still valid if MAXPCT |
---|
35 | * is changed, but the dictionary sizes will no longer fall on neat |
---|
36 | * boundaries). |
---|
37 | */ |
---|
38 | static int goodsizes[] = { |
---|
39 | 53, 223, 907, |
---|
40 | #if ((BIG_DICT * 100) / MAXPCT) <= 23003 |
---|
41 | 23003, /* ~16000 words */ |
---|
42 | #endif |
---|
43 | #if ((BIG_DICT * 100) / MAXPCT) <= 31469 |
---|
44 | 31469, /* ~22000 words */ |
---|
45 | #endif |
---|
46 | #if ((BIG_DICT * 100) / MAXPCT) <= 42859 |
---|
47 | 42859, /* ~30000 words */ |
---|
48 | #endif |
---|
49 | #if ((BIG_DICT * 100) / MAXPCT) <= 57143 |
---|
50 | 57143, /* ~40000 words */ |
---|
51 | #endif |
---|
52 | 71429 /* ~50000 words */ |
---|
53 | }; |
---|
54 | |
---|
55 | struct dent *treeinsert(); |
---|
56 | struct dent *tinsert(); |
---|
57 | struct dent *treelookup(); |
---|
58 | |
---|
59 | static char personaldict[MAXPATHLEN]; |
---|
60 | static FILE *dictf; |
---|
61 | static newwords = 0; |
---|
62 | |
---|
63 | extern char *index (); |
---|
64 | extern struct dent *hashtbl; |
---|
65 | extern int hashsize; |
---|
66 | |
---|
67 | treeinit (p) |
---|
68 | char *p; |
---|
69 | { |
---|
70 | char *h; |
---|
71 | char *orig; |
---|
72 | char buf[BUFSIZ]; |
---|
73 | struct dent *dp; |
---|
74 | |
---|
75 | /* |
---|
76 | ** if p exists and begins with '/' we don't really need HOME, |
---|
77 | ** but it's not very likely that HOME isn't set anyway. |
---|
78 | */ |
---|
79 | orig = p; |
---|
80 | if (p == NULL) |
---|
81 | p = getenv (PDICTVAR); |
---|
82 | if ((h = getenv ("HOME")) == NULL) |
---|
83 | return(-1); |
---|
84 | |
---|
85 | if (p == NULL) |
---|
86 | (void) sprintf(personaldict,"%s/%s",h,DEFPDICT); |
---|
87 | else { |
---|
88 | if (*p == '/') |
---|
89 | (void) strcpy(personaldict,p); |
---|
90 | else { |
---|
91 | /* |
---|
92 | ** The user gave us a relative pathname. How we |
---|
93 | ** interpret it depends on how it was given: |
---|
94 | ** |
---|
95 | ** -p switch: as-is first, then $HOME/name |
---|
96 | ** PDICTVAR: $HOME/name first, then as-is |
---|
97 | **/ |
---|
98 | if (orig == NULL) |
---|
99 | (void) sprintf (personaldict, "%s/%s", h, p); |
---|
100 | else /* -p switch */ |
---|
101 | (void) strcpy (personaldict, p); |
---|
102 | } |
---|
103 | } |
---|
104 | |
---|
105 | if ((dictf = fopen (personaldict, "r")) == NULL) { |
---|
106 | /* The file doesn't exist. */ |
---|
107 | if (p != NULL) { |
---|
108 | /* If pathname is relative, try another place */ |
---|
109 | if (*p != '/') { |
---|
110 | if (orig == NULL) |
---|
111 | (void) strcpy (personaldict, p); |
---|
112 | else /* -p switch */ |
---|
113 | (void) sprintf (personaldict, "%s/%s", h, p); |
---|
114 | dictf = fopen (personaldict, "r"); |
---|
115 | } |
---|
116 | if (dictf == NULL) { |
---|
117 | (void) fprintf (stderr, "Couldn't open "); |
---|
118 | perror (p); |
---|
119 | if (*p != '/') { |
---|
120 | /* |
---|
121 | ** Restore the preferred default, so |
---|
122 | ** that output will go th the right |
---|
123 | ** place. |
---|
124 | */ |
---|
125 | if (orig == NULL) |
---|
126 | (void) sprintf (personaldict, |
---|
127 | "%s/%s", h, p); |
---|
128 | else /* -p switch */ |
---|
129 | (void) strcpy (personaldict, p); |
---|
130 | } |
---|
131 | } |
---|
132 | } |
---|
133 | /* If the name wasn't specified explicitly, we don't object */ |
---|
134 | return(0); |
---|
135 | } |
---|
136 | |
---|
137 | while (fgets (buf, sizeof buf, dictf) != NULL) { |
---|
138 | int len = strlen (buf) - 1; |
---|
139 | |
---|
140 | if (buf [ len ] == '\n') |
---|
141 | buf [ len-- ] = '\0'; |
---|
142 | if ((h = index (buf, '/')) != NULL) |
---|
143 | *h++ = '\0'; |
---|
144 | dp = treeinsert (buf, 1); |
---|
145 | if (h != NULL) { |
---|
146 | while (*h != '\0' && *h != '\n') { |
---|
147 | switch (*h++) { |
---|
148 | case 'D': |
---|
149 | case 'd': |
---|
150 | dp->d_flag = 1; |
---|
151 | break; |
---|
152 | case 'G': |
---|
153 | case 'g': |
---|
154 | dp->g_flag = 1; |
---|
155 | break; |
---|
156 | case 'H': |
---|
157 | case 'h': |
---|
158 | dp->h_flag = 1; |
---|
159 | break; |
---|
160 | case 'J': |
---|
161 | case 'j': |
---|
162 | dp->j_flag = 1; |
---|
163 | break; |
---|
164 | case 'M': |
---|
165 | case 'm': |
---|
166 | dp->m_flag = 1; |
---|
167 | break; |
---|
168 | case 'N': |
---|
169 | case 'n': |
---|
170 | dp->n_flag = 1; |
---|
171 | break; |
---|
172 | case 'P': |
---|
173 | case 'p': |
---|
174 | dp->p_flag = 1; |
---|
175 | break; |
---|
176 | case 'R': |
---|
177 | case 'r': |
---|
178 | dp->r_flag = 1; |
---|
179 | break; |
---|
180 | case 'S': |
---|
181 | case 's': |
---|
182 | dp->s_flag = 1; |
---|
183 | break; |
---|
184 | case 'T': |
---|
185 | case 't': |
---|
186 | dp->t_flag = 1; |
---|
187 | break; |
---|
188 | case 'V': |
---|
189 | case 'v': |
---|
190 | dp->v_flag = 1; |
---|
191 | break; |
---|
192 | case 'X': |
---|
193 | case 'x': |
---|
194 | dp->x_flag = 1; |
---|
195 | break; |
---|
196 | case 'Y': |
---|
197 | case 'y': |
---|
198 | dp->y_flag = 1; |
---|
199 | break; |
---|
200 | case 'Z': |
---|
201 | case 'z': |
---|
202 | dp->z_flag = 1; |
---|
203 | break; |
---|
204 | default: |
---|
205 | fprintf (stderr, |
---|
206 | "Illegal flag in personal dictionary - %c (word %s)\n", |
---|
207 | h[-1], buf); |
---|
208 | break; |
---|
209 | } |
---|
210 | /* Accept old-format dicts with extra slashes */ |
---|
211 | if (*h == '/') |
---|
212 | h++; |
---|
213 | } |
---|
214 | } |
---|
215 | } |
---|
216 | |
---|
217 | (void) fclose (dictf); |
---|
218 | |
---|
219 | newwords = 0; |
---|
220 | |
---|
221 | if (!lflag && !aflag && access (personaldict, 2) < 0) |
---|
222 | printf ("Warning: Cannot update personal dictionary (%s)\r\n", personaldict); |
---|
223 | } |
---|
224 | |
---|
225 | treeprint () |
---|
226 | { |
---|
227 | register int i; |
---|
228 | register struct dent *dp; |
---|
229 | register struct dent *cp; |
---|
230 | |
---|
231 | printf ("("); |
---|
232 | for (i = 0; i < hsize; i++) { |
---|
233 | dp = &htab[i]; |
---|
234 | if (dp->used) { |
---|
235 | for (cp = dp; cp != NULL; cp = cp->next) |
---|
236 | printf ("%s ", cp->word); |
---|
237 | } |
---|
238 | } |
---|
239 | printf (")"); |
---|
240 | } |
---|
241 | |
---|
242 | struct dent * |
---|
243 | treeinsert (word, keep) |
---|
244 | char *word; |
---|
245 | { |
---|
246 | register int i; |
---|
247 | struct dent *dp; |
---|
248 | struct dent *olddp; |
---|
249 | struct dent *oldhtab; |
---|
250 | int oldhsize; |
---|
251 | char nword[BUFSIZ]; |
---|
252 | |
---|
253 | (void) strcpy (nword, word); |
---|
254 | (void) upcase (nword); |
---|
255 | if ((dp = lookup (nword, strlen (nword), 0)) != NULL) { |
---|
256 | if (keep) |
---|
257 | dp->keep = 1; |
---|
258 | return dp; |
---|
259 | } |
---|
260 | /* |
---|
261 | * Expand hash table when it is MAXPCT % full. |
---|
262 | */ |
---|
263 | if (!cantexpand && (hcount * 100) / MAXPCT >= hsize) { |
---|
264 | oldhsize = hsize; |
---|
265 | oldhtab = htab; |
---|
266 | for (i = 0; i < sizeof goodsizes / sizeof (goodsizes[0]); i++) |
---|
267 | if (goodsizes[i] > hsize) |
---|
268 | break; |
---|
269 | if (i >= sizeof goodsizes / sizeof goodsizes[0]) |
---|
270 | hsize += hsize + 1; |
---|
271 | else |
---|
272 | hsize = goodsizes[i]; |
---|
273 | htab = (struct dent *) calloc (hsize, sizeof (struct dent)); |
---|
274 | if (htab == NULL) { |
---|
275 | (void) fprintf (stderr, |
---|
276 | "Ran out of space for personal dictionary\n"); |
---|
277 | /* |
---|
278 | * Try to continue anyway, since our overflow |
---|
279 | * algorithm can handle an overfull (100%+) table, |
---|
280 | * and the malloc very likely failed because we |
---|
281 | * already have such a huge table, so small mallocs |
---|
282 | * for overflow entries will still work. |
---|
283 | */ |
---|
284 | if (oldhtab == NULL) |
---|
285 | exit (1); /* No old table, can't go on */ |
---|
286 | (void) fprintf (stderr, |
---|
287 | "Continuing anyway (with reduced performance).\n"); |
---|
288 | cantexpand = 1; /* Suppress further messages */ |
---|
289 | hsize = oldhsize; /* Put this back how the were */ |
---|
290 | htab = oldhtab; /* ... */ |
---|
291 | newwords = 1; /* And pretend it worked */ |
---|
292 | return tinsert (nword, (struct dent *) NULL, keep); |
---|
293 | } |
---|
294 | /* |
---|
295 | * Re-insert old entries into new table |
---|
296 | */ |
---|
297 | for (i = 0; i < oldhsize; i++) { |
---|
298 | dp = &oldhtab[i]; |
---|
299 | if (oldhtab[i].used) { |
---|
300 | (void) tinsert ((char *) NULL, dp, 0); |
---|
301 | dp = dp->next; |
---|
302 | while (dp != NULL) { |
---|
303 | (void) tinsert ((char *) NULL, dp, 0); |
---|
304 | olddp = dp; |
---|
305 | dp = dp->next; |
---|
306 | free ((char *) olddp); |
---|
307 | } |
---|
308 | } |
---|
309 | } |
---|
310 | if (oldhtab != NULL) |
---|
311 | free ((char *) oldhtab); |
---|
312 | } |
---|
313 | newwords |= keep; |
---|
314 | return tinsert (nword, (struct dent *) NULL, keep); |
---|
315 | } |
---|
316 | |
---|
317 | static |
---|
318 | struct dent * |
---|
319 | tinsert (word, proto, keep) |
---|
320 | char *word; /* One of word/proto must be null */ |
---|
321 | struct dent *proto; |
---|
322 | { |
---|
323 | int hcode; |
---|
324 | register struct dent *hp; /* Next trial entry in hash table */ |
---|
325 | struct dent *php; /* Previous value of hp, for chaining */ |
---|
326 | |
---|
327 | if (word == NULL) |
---|
328 | word = proto->word; |
---|
329 | hcode = hash (word, strlen (word), hsize); |
---|
330 | php = NULL; |
---|
331 | hp = &htab[hcode]; |
---|
332 | if (hp->used) { |
---|
333 | while (hp != NULL) { |
---|
334 | if (strcmp (word, hp->word) == 0) { |
---|
335 | if (keep) |
---|
336 | hp->keep = 1; |
---|
337 | return hp; |
---|
338 | } |
---|
339 | php = hp; |
---|
340 | hp = hp->next; |
---|
341 | } |
---|
342 | hp = (struct dent *) calloc (1, sizeof (struct dent)); |
---|
343 | if (hp == NULL) { |
---|
344 | (void) fprintf (stderr, |
---|
345 | "Ran out of space for personal dictionary\n"); |
---|
346 | exit (1); |
---|
347 | } |
---|
348 | } |
---|
349 | if (proto != NULL) { |
---|
350 | *hp = *proto; |
---|
351 | if (php != NULL) |
---|
352 | php->next = hp; |
---|
353 | hp->next = NULL; |
---|
354 | return &htab[hcode]; |
---|
355 | } else { |
---|
356 | if (php != NULL) |
---|
357 | php->next = hp; |
---|
358 | hp->word = (char *) malloc (strlen (word) + 1); |
---|
359 | if (hp->word == NULL) { |
---|
360 | (void) fprintf (stderr, |
---|
361 | "Ran out of space for personal dictionary\n"); |
---|
362 | exit (1); |
---|
363 | } |
---|
364 | hp->used = 1; |
---|
365 | hp->next = NULL; |
---|
366 | hp->d_flag = 0; |
---|
367 | hp->g_flag = 0; |
---|
368 | hp->h_flag = 0; |
---|
369 | hp->j_flag = 0; |
---|
370 | hp->m_flag = 0; |
---|
371 | hp->n_flag = 0; |
---|
372 | hp->p_flag = 0; |
---|
373 | hp->r_flag = 0; |
---|
374 | hp->s_flag = 0; |
---|
375 | hp->t_flag = 0; |
---|
376 | hp->v_flag = 0; |
---|
377 | hp->x_flag = 0; |
---|
378 | hp->y_flag = 0; |
---|
379 | hp->z_flag = 0; |
---|
380 | (void) strcpy (hp->word, word); |
---|
381 | hp->keep = keep; |
---|
382 | hcount++; |
---|
383 | return (hp); |
---|
384 | } |
---|
385 | } |
---|
386 | |
---|
387 | struct dent * |
---|
388 | treelookup (word) |
---|
389 | char *word; |
---|
390 | { |
---|
391 | int hcode; |
---|
392 | register struct dent *hp; |
---|
393 | char nword[BUFSIZ]; |
---|
394 | |
---|
395 | if (hsize <= 0) |
---|
396 | return NULL; |
---|
397 | (void) strcpy (nword, word); |
---|
398 | hcode = hash (nword, strlen (nword), hsize); |
---|
399 | hp = &htab[hcode]; |
---|
400 | while (hp != NULL && hp->used) { |
---|
401 | if (strcmp (nword, hp->word) == 0) |
---|
402 | break; |
---|
403 | hp = hp->next; |
---|
404 | } |
---|
405 | if (hp != NULL && hp->used) |
---|
406 | return hp; |
---|
407 | else |
---|
408 | return NULL; |
---|
409 | } |
---|
410 | |
---|
411 | treeoutput () |
---|
412 | { |
---|
413 | if (newwords == 0) |
---|
414 | return(0); |
---|
415 | |
---|
416 | if ((dictf = fopen (personaldict, "w")) == NULL) { |
---|
417 | fprintf (stderr, "Can't create %s\r\n", personaldict); |
---|
418 | return(-1); |
---|
419 | } |
---|
420 | |
---|
421 | toutput1 (); |
---|
422 | newwords = 0; |
---|
423 | |
---|
424 | (void) fclose (dictf); |
---|
425 | } |
---|
426 | |
---|
427 | static |
---|
428 | toutput1 () |
---|
429 | { |
---|
430 | register struct dent *cent; /* Current entry */ |
---|
431 | register struct dent *lent; /* Linked entry */ |
---|
432 | |
---|
433 | for (cent = htab; cent - htab < hsize; cent++) { |
---|
434 | for (lent = cent; lent != NULL; lent = lent->next) { |
---|
435 | if (lent->used && lent->keep) |
---|
436 | toutput2 (lent); |
---|
437 | } |
---|
438 | } |
---|
439 | for (cent = hashtbl, lent = hashtbl + hashsize; |
---|
440 | cent < lent; |
---|
441 | cent++) { |
---|
442 | if (cent->used && cent->keep) |
---|
443 | toutput2 (cent); |
---|
444 | } |
---|
445 | } |
---|
446 | |
---|
447 | static int hasslash; |
---|
448 | |
---|
449 | static |
---|
450 | toutput2 (cent) |
---|
451 | register struct dent *cent; |
---|
452 | { |
---|
453 | |
---|
454 | hasslash = 0; |
---|
455 | fprintf (dictf, "%s", cent->word); |
---|
456 | if (cent->d_flag) |
---|
457 | flagout ('D'); |
---|
458 | if (cent->g_flag) |
---|
459 | flagout ('G'); |
---|
460 | if (cent->h_flag) |
---|
461 | flagout ('H'); |
---|
462 | if (cent->j_flag) |
---|
463 | flagout ('J'); |
---|
464 | if (cent->m_flag) |
---|
465 | flagout ('M'); |
---|
466 | if (cent->n_flag) |
---|
467 | flagout ('N'); |
---|
468 | if (cent->p_flag) |
---|
469 | flagout ('P'); |
---|
470 | if (cent->r_flag) |
---|
471 | flagout ('R'); |
---|
472 | if (cent->s_flag) |
---|
473 | flagout ('S'); |
---|
474 | if (cent->t_flag) |
---|
475 | flagout ('T'); |
---|
476 | if (cent->v_flag) |
---|
477 | flagout ('V'); |
---|
478 | if (cent->x_flag) |
---|
479 | flagout ('X'); |
---|
480 | if (cent->y_flag) |
---|
481 | flagout ('Y'); |
---|
482 | if (cent->z_flag) |
---|
483 | flagout ('Z'); |
---|
484 | fprintf (dictf, "\n"); |
---|
485 | } |
---|
486 | |
---|
487 | static |
---|
488 | flagout (flag) |
---|
489 | { |
---|
490 | if (!hasslash) |
---|
491 | (void) putc ('/', dictf); |
---|
492 | hasslash = 1; |
---|
493 | (void) putc (flag, dictf); |
---|
494 | } |
---|
495 | |
---|
496 | char * |
---|
497 | upcase (s) |
---|
498 | register char *s; |
---|
499 | { |
---|
500 | register char *os = s; |
---|
501 | |
---|
502 | while (*s) { |
---|
503 | if (mylower (*s)) |
---|
504 | *s = toupper (*s); |
---|
505 | s++; |
---|
506 | } |
---|
507 | return (os); |
---|
508 | } |
---|