1 | #ifndef lint |
---|
2 | static char Rcs_Id[] = |
---|
3 | "$Id: buildhash.c,v 1.1.1.1 1997-09-03 21:08:11 ghudson Exp $"; |
---|
4 | #endif |
---|
5 | |
---|
6 | #define MAIN |
---|
7 | |
---|
8 | /* |
---|
9 | * buildhash.c - make a hash table for okspell |
---|
10 | * |
---|
11 | * Pace Willisson, 1983 |
---|
12 | * |
---|
13 | * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA |
---|
14 | * All rights reserved. |
---|
15 | * |
---|
16 | * Redistribution and use in source and binary forms, with or without |
---|
17 | * modification, are permitted provided that the following conditions |
---|
18 | * are met: |
---|
19 | * |
---|
20 | * 1. Redistributions of source code must retain the above copyright |
---|
21 | * notice, this list of conditions and the following disclaimer. |
---|
22 | * 2. Redistributions in binary form must reproduce the above copyright |
---|
23 | * notice, this list of conditions and the following disclaimer in the |
---|
24 | * documentation and/or other materials provided with the distribution. |
---|
25 | * 3. All modifications to the source code must be clearly marked as |
---|
26 | * such. Binary redistributions based on modified source code |
---|
27 | * must be clearly marked as modified versions in the documentation |
---|
28 | * and/or other materials provided with the distribution. |
---|
29 | * 4. All advertising materials mentioning features or use of this software |
---|
30 | * must display the following acknowledgment: |
---|
31 | * This product includes software developed by Geoff Kuenning and |
---|
32 | * other unpaid contributors. |
---|
33 | * 5. The name of Geoff Kuenning may not be used to endorse or promote |
---|
34 | * products derived from this software without specific prior |
---|
35 | * written permission. |
---|
36 | * |
---|
37 | * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND |
---|
38 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
---|
39 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
---|
40 | * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE |
---|
41 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
---|
42 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
---|
43 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
---|
44 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
---|
45 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
---|
46 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
---|
47 | * SUCH DAMAGE. |
---|
48 | */ |
---|
49 | |
---|
50 | /* |
---|
51 | * $Log: not supported by cvs2svn $ |
---|
52 | * Revision 1.64 1995/01/08 23:23:26 geoff |
---|
53 | * Make the various file suffixes configurable for DOS purposes. |
---|
54 | * |
---|
55 | * Revision 1.63 1994/10/26 05:12:25 geoff |
---|
56 | * Get rid of some duplicate declarations. |
---|
57 | * |
---|
58 | * Revision 1.62 1994/07/28 05:11:33 geoff |
---|
59 | * Log message for previous revision: distinguish a zero count from a bad |
---|
60 | * count file. |
---|
61 | * |
---|
62 | * Revision 1.61 1994/07/28 04:53:30 geoff |
---|
63 | * |
---|
64 | * Revision 1.60 1994/01/25 07:11:18 geoff |
---|
65 | * Get rid of all old RCS log lines in preparation for the 3.1 release. |
---|
66 | * |
---|
67 | */ |
---|
68 | |
---|
69 | #include "config.h" |
---|
70 | #include "ispell.h" |
---|
71 | #include "proto.h" |
---|
72 | #include "msgs.h" |
---|
73 | #include "version.h" |
---|
74 | #include <ctype.h> |
---|
75 | #include <sys/stat.h> |
---|
76 | |
---|
77 | int main P ((int argc, char * argv[])); |
---|
78 | static void output P ((void)); |
---|
79 | static void filltable P ((void)); |
---|
80 | VOID * mymalloc P ((unsigned int size)); |
---|
81 | VOID * myrealloc P ((VOID * ptr, unsigned int size, |
---|
82 | unsigned int oldsize)); |
---|
83 | void myfree P ((VOID * ptr)); |
---|
84 | static void readdict P ((void)); |
---|
85 | static void newcount P ((void)); |
---|
86 | |
---|
87 | #define NSTAT 100 /* Size probe-statistics table */ |
---|
88 | |
---|
89 | struct stat dstat; /* Result of stat-ing dict file */ |
---|
90 | struct stat cstat; /* Result of stat-ing count file */ |
---|
91 | |
---|
92 | char * Dfile; /* Name of dictionary file */ |
---|
93 | char * Hfile; /* Name of hash (output) file */ |
---|
94 | char * Lfile; /* Name of language file */ |
---|
95 | |
---|
96 | char Cfile[MAXPATHLEN]; /* Name of count file */ |
---|
97 | char Sfile[MAXPATHLEN]; /* Name of statistics file */ |
---|
98 | |
---|
99 | static int silent = 0; /* NZ to suppress count reports */ |
---|
100 | |
---|
101 | int main (argc, argv) |
---|
102 | int argc; |
---|
103 | char * argv[]; |
---|
104 | { |
---|
105 | int avg; |
---|
106 | FILE * countf; |
---|
107 | FILE * statf; |
---|
108 | int stats[NSTAT]; |
---|
109 | int i; |
---|
110 | int j; |
---|
111 | |
---|
112 | while (argc > 1 && *argv[1] == '-') |
---|
113 | { |
---|
114 | argc--; |
---|
115 | argv++; |
---|
116 | switch (argv[0][1]) |
---|
117 | { |
---|
118 | case 's': |
---|
119 | silent = 1; |
---|
120 | break; |
---|
121 | } |
---|
122 | } |
---|
123 | if (argc == 4) |
---|
124 | { |
---|
125 | Dfile = argv[1]; |
---|
126 | Lfile = argv[2]; |
---|
127 | Hfile = argv[3]; |
---|
128 | } |
---|
129 | else |
---|
130 | { |
---|
131 | (void) fprintf (stderr, BHASH_C_USAGE); |
---|
132 | return 1; |
---|
133 | } |
---|
134 | |
---|
135 | if (yyopen (Lfile)) /* Open the language file */ |
---|
136 | return 1; |
---|
137 | yyinit (); /* Set up for the parse */ |
---|
138 | if (yyparse ()) /* Parse the language tables */ |
---|
139 | exit (1); |
---|
140 | |
---|
141 | (void) sprintf (Cfile, "%s%s", Dfile, COUNTSUFFIX); |
---|
142 | (void) sprintf (Sfile, "%s%s", Dfile, STATSUFFIX); |
---|
143 | |
---|
144 | if (stat (Dfile, &dstat) < 0) |
---|
145 | { |
---|
146 | (void) fprintf (stderr, BHASH_C_NO_DICT, Dfile); |
---|
147 | exit (1); |
---|
148 | } |
---|
149 | if (stat (Cfile, &cstat) < 0 || dstat.st_mtime > cstat.st_mtime) |
---|
150 | newcount (); |
---|
151 | |
---|
152 | if ((countf = fopen (Cfile, "r")) == NULL) |
---|
153 | { |
---|
154 | (void) fprintf (stderr, BHASH_C_NO_COUNT); |
---|
155 | exit (1); |
---|
156 | } |
---|
157 | hashsize = 0; |
---|
158 | if (fscanf (countf, "%d", &hashsize) != 1 || fclose (countf) == EOF) |
---|
159 | { |
---|
160 | (void) fprintf (stderr, BHASH_C_BAD_COUNT); |
---|
161 | exit (1); |
---|
162 | } |
---|
163 | if (hashsize == 0) |
---|
164 | { |
---|
165 | (void) fprintf (stderr, BHASH_C_ZERO_COUNT); |
---|
166 | exit (1); |
---|
167 | } |
---|
168 | readdict (); |
---|
169 | |
---|
170 | if ((statf = fopen (Sfile, "w")) == NULL) |
---|
171 | { |
---|
172 | (void) fprintf (stderr, CANT_CREATE, Sfile); |
---|
173 | exit (1); |
---|
174 | } |
---|
175 | |
---|
176 | for (i = 0; i < NSTAT; i++) |
---|
177 | stats[i] = 0; |
---|
178 | for (i = 0; i < hashsize; i++) |
---|
179 | { |
---|
180 | struct dent * dp; |
---|
181 | |
---|
182 | dp = &hashtbl[i]; |
---|
183 | if ((dp->flagfield & USED) != 0) |
---|
184 | { |
---|
185 | for (j = 0; dp != NULL; j++, dp = dp->next) |
---|
186 | { |
---|
187 | if (j >= NSTAT) |
---|
188 | j = NSTAT - 1; |
---|
189 | stats[j]++; |
---|
190 | } |
---|
191 | } |
---|
192 | } |
---|
193 | for (i = 0, j = 0, avg = 0; i < NSTAT; i++) |
---|
194 | { |
---|
195 | j += stats[i]; |
---|
196 | avg += stats[i] * (i + 1); |
---|
197 | if (j == 0) |
---|
198 | (void) fprintf (statf, "%d:\t%d\t0\t0.0\n", i + 1, stats[i]); |
---|
199 | else |
---|
200 | (void) fprintf (statf, "%d:\t%d\t%d\t%f\n", i + 1, stats[i], j, |
---|
201 | (double) avg / j); |
---|
202 | } |
---|
203 | (void) fclose (statf); |
---|
204 | |
---|
205 | filltable (); |
---|
206 | output (); |
---|
207 | return 0; |
---|
208 | } |
---|
209 | |
---|
210 | static void output () |
---|
211 | { |
---|
212 | register FILE * houtfile; |
---|
213 | register struct dent * dp; |
---|
214 | int strptr; |
---|
215 | int n; |
---|
216 | int i; |
---|
217 | int maxplen; |
---|
218 | int maxslen; |
---|
219 | struct flagent * fentry; |
---|
220 | |
---|
221 | if ((houtfile = fopen (Hfile, "wb")) == NULL) |
---|
222 | { |
---|
223 | (void) fprintf (stderr, CANT_CREATE, Hfile); |
---|
224 | return; |
---|
225 | } |
---|
226 | hashheader.stringsize = 0; |
---|
227 | hashheader.lstringsize = 0; |
---|
228 | hashheader.tblsize = hashsize; |
---|
229 | (void) fwrite ((char *) &hashheader, sizeof hashheader, 1, houtfile); |
---|
230 | strptr = 0; |
---|
231 | /* |
---|
232 | ** Put out the strings from the flags table. This code assumes that |
---|
233 | ** the size of the hash header is a multiple of the size of ichar_t, |
---|
234 | ** and that any integer can be converted to an (ichar_t *) and back |
---|
235 | ** without damage. |
---|
236 | */ |
---|
237 | maxslen = 0; |
---|
238 | for (i = numsflags, fentry = sflaglist; --i >= 0; fentry++) |
---|
239 | { |
---|
240 | if (fentry->stripl) |
---|
241 | { |
---|
242 | (void) fwrite ((char *) fentry->strip, fentry->stripl + 1, |
---|
243 | sizeof (ichar_t), houtfile); |
---|
244 | fentry->strip = (ichar_t *) strptr; |
---|
245 | strptr += (fentry->stripl + 1) * sizeof (ichar_t); |
---|
246 | } |
---|
247 | if (fentry->affl) |
---|
248 | { |
---|
249 | (void) fwrite ((char *) fentry->affix, fentry->affl + 1, |
---|
250 | sizeof (ichar_t), houtfile); |
---|
251 | fentry->affix = (ichar_t *) strptr; |
---|
252 | strptr += (fentry->affl + 1) * sizeof (ichar_t); |
---|
253 | } |
---|
254 | n = fentry->affl - fentry->stripl; |
---|
255 | if (n < 0) |
---|
256 | n = -n; |
---|
257 | if (n > maxslen) |
---|
258 | maxslen = n; |
---|
259 | } |
---|
260 | maxplen = 0; |
---|
261 | for (i = numpflags, fentry = pflaglist; --i >= 0; fentry++) |
---|
262 | { |
---|
263 | if (fentry->stripl) |
---|
264 | { |
---|
265 | (void) fwrite ((char *) fentry->strip, fentry->stripl + 1, |
---|
266 | sizeof (ichar_t), houtfile); |
---|
267 | fentry->strip = (ichar_t *) strptr; |
---|
268 | strptr += (fentry->stripl + 1) * sizeof (ichar_t); |
---|
269 | } |
---|
270 | if (fentry->affl) |
---|
271 | { |
---|
272 | (void) fwrite ((char *) fentry->affix, fentry->affl + 1, |
---|
273 | sizeof (ichar_t), houtfile); |
---|
274 | fentry->affix = (ichar_t *) strptr; |
---|
275 | strptr += (fentry->affl + 1) * sizeof (ichar_t); |
---|
276 | } |
---|
277 | n = fentry->affl - fentry->stripl; |
---|
278 | if (n < 0) |
---|
279 | n = -n; |
---|
280 | if (n > maxplen) |
---|
281 | maxplen = n; |
---|
282 | } |
---|
283 | /* |
---|
284 | ** Write out the string character type tables. |
---|
285 | */ |
---|
286 | hashheader.strtypestart = strptr; |
---|
287 | for (i = 0; i < hashheader.nstrchartype; i++) |
---|
288 | { |
---|
289 | n = strlen (chartypes[i].name) + 1; |
---|
290 | (void) fwrite (chartypes[i].name, n, 1, houtfile); |
---|
291 | strptr += n; |
---|
292 | n = strlen (chartypes[i].deformatter) + 1; |
---|
293 | (void) fwrite (chartypes[i].deformatter, n, 1, houtfile); |
---|
294 | strptr += n; |
---|
295 | for (n = 0; |
---|
296 | chartypes[i].suffixes[n] != '\0'; |
---|
297 | n += strlen (&chartypes[i].suffixes[n]) + 1) |
---|
298 | ; |
---|
299 | n++; |
---|
300 | (void) fwrite (chartypes[i].suffixes, n, 1, houtfile); |
---|
301 | strptr += n; |
---|
302 | } |
---|
303 | hashheader.lstringsize = strptr; |
---|
304 | /* We allow one extra byte because missingletter() may add one byte */ |
---|
305 | maxslen += maxplen + 1; |
---|
306 | if (maxslen > MAXAFFIXLEN) |
---|
307 | { |
---|
308 | (void) fprintf (stderr, |
---|
309 | BHASH_C_BAFF_1 (MAXAFFIXLEN, maxslen - MAXAFFIXLEN)); |
---|
310 | (void) fprintf (stderr, BHASH_C_BAFF_2); |
---|
311 | } |
---|
312 | /* Put out the dictionary strings */ |
---|
313 | for (i = 0, dp = hashtbl; i < hashsize; i++, dp++) |
---|
314 | { |
---|
315 | if (dp->word == NULL) |
---|
316 | dp->word = (char *) -1; |
---|
317 | else |
---|
318 | { |
---|
319 | n = strlen (dp->word) + 1; |
---|
320 | (void) fwrite (dp->word, n, 1, houtfile); |
---|
321 | dp->word = (char *) strptr; |
---|
322 | strptr += n; |
---|
323 | } |
---|
324 | } |
---|
325 | /* Pad file to a struct dent boundary for efficiency. */ |
---|
326 | n = (strptr + sizeof hashheader) % sizeof (struct dent); |
---|
327 | if (n != 0) |
---|
328 | { |
---|
329 | n = sizeof (struct dent) - n; |
---|
330 | strptr += n; |
---|
331 | while (--n >= 0) |
---|
332 | (void) putc ('\0', houtfile); |
---|
333 | } |
---|
334 | /* Put out the hash table itself */ |
---|
335 | for (i = 0, dp = hashtbl; i < hashsize; i++, dp++) |
---|
336 | { |
---|
337 | if (dp->next != 0) |
---|
338 | { |
---|
339 | int x; |
---|
340 | x = dp->next - hashtbl; |
---|
341 | dp->next = (struct dent *)x; |
---|
342 | } |
---|
343 | else |
---|
344 | { |
---|
345 | dp->next = (struct dent *)-1; |
---|
346 | } |
---|
347 | #ifdef PIECEMEAL_HASH_WRITES |
---|
348 | (void) fwrite ((char *) dp, sizeof (struct dent), 1, houtfile); |
---|
349 | #endif /* PIECEMEAL_HASH_WRITES */ |
---|
350 | } |
---|
351 | #ifndef PIECEMEAL_HASH_WRITES |
---|
352 | (void) fwrite ((char *) hashtbl, sizeof (struct dent), hashsize, houtfile); |
---|
353 | #endif /* PIECEMEAL_HASH_WRITES */ |
---|
354 | /* Put out the language tables */ |
---|
355 | (void) fwrite ((char *) sflaglist, |
---|
356 | sizeof (struct flagent), numsflags, houtfile); |
---|
357 | hashheader.stblsize = numsflags; |
---|
358 | (void) fwrite ((char *) pflaglist, |
---|
359 | sizeof (struct flagent), numpflags, houtfile); |
---|
360 | hashheader.ptblsize = numpflags; |
---|
361 | /* Finish filling in the hash header. */ |
---|
362 | hashheader.stringsize = strptr; |
---|
363 | rewind (houtfile); |
---|
364 | (void) fwrite ((char *) &hashheader, sizeof hashheader, 1, houtfile); |
---|
365 | (void) fclose (houtfile); |
---|
366 | } |
---|
367 | |
---|
368 | static void filltable () |
---|
369 | { |
---|
370 | struct dent *freepointer, *nextword, *dp; |
---|
371 | struct dent *hashend; |
---|
372 | int i; |
---|
373 | int overflows; |
---|
374 | |
---|
375 | hashend = hashtbl + hashsize; |
---|
376 | for (freepointer = hashtbl; |
---|
377 | (freepointer->flagfield & USED) && freepointer < hashend; |
---|
378 | freepointer++) |
---|
379 | ; |
---|
380 | overflows = 0; |
---|
381 | for (nextword = hashtbl, i = hashsize; i != 0; nextword++, i--) |
---|
382 | { |
---|
383 | if ((nextword->flagfield & USED) == 0) |
---|
384 | continue; |
---|
385 | if (nextword->next >= hashtbl && nextword->next < hashend) |
---|
386 | continue; |
---|
387 | dp = nextword; |
---|
388 | while (dp->next) |
---|
389 | { |
---|
390 | if (freepointer >= hashend) |
---|
391 | { |
---|
392 | overflows++; |
---|
393 | break; |
---|
394 | } |
---|
395 | else |
---|
396 | { |
---|
397 | *freepointer = *(dp->next); |
---|
398 | dp->next = freepointer; |
---|
399 | dp = freepointer; |
---|
400 | |
---|
401 | while ((freepointer->flagfield & USED) |
---|
402 | && freepointer < hashend) |
---|
403 | freepointer++; |
---|
404 | } |
---|
405 | } |
---|
406 | } |
---|
407 | if (overflows) |
---|
408 | (void) fprintf (stderr, BHASH_C_OVERFLOW, overflows); |
---|
409 | } |
---|
410 | |
---|
411 | #if MALLOC_INCREMENT == 0 |
---|
412 | VOID * mymalloc (size) |
---|
413 | unsigned int size; |
---|
414 | { |
---|
415 | |
---|
416 | return malloc (size); |
---|
417 | } |
---|
418 | |
---|
419 | /* ARGSUSED */ |
---|
420 | VOID * myrealloc (ptr, size, oldsize) |
---|
421 | VOID * ptr; |
---|
422 | unsigned int size; |
---|
423 | unsigned int oldsize; |
---|
424 | { |
---|
425 | |
---|
426 | return realloc (ptr, size); |
---|
427 | } |
---|
428 | |
---|
429 | void myfree (ptr) |
---|
430 | VOID * ptr; |
---|
431 | { |
---|
432 | |
---|
433 | free (ptr); |
---|
434 | } |
---|
435 | |
---|
436 | #else |
---|
437 | |
---|
438 | VOID * mymalloc (size) /* Fast, unfree-able variant of malloc */ |
---|
439 | unsigned int size; |
---|
440 | { |
---|
441 | VOID * retval; |
---|
442 | static int bytesleft = 0; |
---|
443 | static VOID * nextspace; |
---|
444 | |
---|
445 | if (size < 4) |
---|
446 | size = 4; |
---|
447 | size = (size + 7) & ~7; /* Assume doubleword boundaries are enough */ |
---|
448 | if (bytesleft < size) |
---|
449 | { |
---|
450 | bytesleft = (size < MALLOC_INCREMENT) ? MALLOC_INCREMENT : size; |
---|
451 | nextspace = malloc ((unsigned) bytesleft); |
---|
452 | if (nextspace == NULL) |
---|
453 | { |
---|
454 | bytesleft = 0; |
---|
455 | return NULL; |
---|
456 | } |
---|
457 | } |
---|
458 | retval = nextspace; |
---|
459 | nextspace = (VOID *) ((char *) nextspace + size); |
---|
460 | bytesleft -= size; |
---|
461 | return retval; |
---|
462 | } |
---|
463 | |
---|
464 | VOID * myrealloc (ptr, size, oldsize) |
---|
465 | VOID * ptr; |
---|
466 | unsigned int size; |
---|
467 | unsigned int oldsize; |
---|
468 | { |
---|
469 | VOID *nptr; |
---|
470 | |
---|
471 | nptr = mymalloc (size); |
---|
472 | if (nptr == NULL) |
---|
473 | return NULL; |
---|
474 | (void) bcopy (ptr, nptr, oldsize); |
---|
475 | return nptr; |
---|
476 | } |
---|
477 | |
---|
478 | /* ARGSUSED */ |
---|
479 | void myfree (ptr) |
---|
480 | VOID * ptr; |
---|
481 | { |
---|
482 | } |
---|
483 | #endif |
---|
484 | |
---|
485 | static void readdict () |
---|
486 | { |
---|
487 | struct dent d; |
---|
488 | register struct dent * dp; |
---|
489 | struct dent * lastdp; |
---|
490 | char lbuf[INPUTWORDLEN + MAXAFFIXLEN + 2 * MASKBITS]; |
---|
491 | char ucbuf[INPUTWORDLEN + MAXAFFIXLEN + 2 * MASKBITS]; |
---|
492 | FILE * dictf; |
---|
493 | int i; |
---|
494 | int h; |
---|
495 | |
---|
496 | if ((dictf = fopen (Dfile, "r")) == NULL) |
---|
497 | { |
---|
498 | (void) fprintf (stderr, BHASH_C_CANT_OPEN_DICT); |
---|
499 | exit (1); |
---|
500 | } |
---|
501 | |
---|
502 | hashtbl = |
---|
503 | (struct dent *) calloc ((unsigned) hashsize, sizeof (struct dent)); |
---|
504 | if (hashtbl == NULL) |
---|
505 | { |
---|
506 | (void) fprintf (stderr, BHASH_C_NO_SPACE); |
---|
507 | exit (1); |
---|
508 | } |
---|
509 | |
---|
510 | i = 0; |
---|
511 | while (fgets (lbuf, sizeof lbuf, dictf) != NULL) |
---|
512 | { |
---|
513 | if (!silent && (i % 1000) == 0) |
---|
514 | { |
---|
515 | (void) fprintf (stderr, "%d ", i); |
---|
516 | (void) fflush (stdout); |
---|
517 | } |
---|
518 | i++; |
---|
519 | |
---|
520 | if (makedent (lbuf, sizeof lbuf, &d) < 0) |
---|
521 | continue; |
---|
522 | |
---|
523 | h = hash (strtosichar (d.word, 1), hashsize); |
---|
524 | |
---|
525 | dp = &hashtbl[h]; |
---|
526 | if ((dp->flagfield & USED) == 0) |
---|
527 | { |
---|
528 | *dp = d; |
---|
529 | #ifndef NO_CAPITALIZATION_SUPPORT |
---|
530 | /* |
---|
531 | ** If it's a followcase word, we need to make this a |
---|
532 | ** special dummy entry, and add a second with the |
---|
533 | ** correct capitalization. |
---|
534 | */ |
---|
535 | if (captype (d.flagfield) == FOLLOWCASE) |
---|
536 | { |
---|
537 | if (addvheader (dp)) |
---|
538 | exit (1); |
---|
539 | } |
---|
540 | #endif |
---|
541 | } |
---|
542 | else |
---|
543 | { |
---|
544 | |
---|
545 | /* |
---|
546 | ** Collision. Skip to the end of the collision |
---|
547 | ** chain, or to a pre-existing entry for this |
---|
548 | ** word. Note that d.word always exists at |
---|
549 | ** this point. |
---|
550 | */ |
---|
551 | (void) strcpy (ucbuf, d.word); |
---|
552 | chupcase (ucbuf); |
---|
553 | while (dp != NULL) |
---|
554 | { |
---|
555 | if (strcmp (dp->word, ucbuf) == 0) |
---|
556 | break; |
---|
557 | #ifndef NO_CAPITALIZATION_SUPPORT |
---|
558 | while (dp->flagfield & MOREVARIANTS) |
---|
559 | dp = dp->next; |
---|
560 | #endif /* NO_CAPITALIZATION_SUPPORT */ |
---|
561 | dp = dp->next; |
---|
562 | } |
---|
563 | if (dp != NULL) |
---|
564 | { |
---|
565 | /* |
---|
566 | ** A different capitalization is already in |
---|
567 | ** the dictionary. Combine capitalizations. |
---|
568 | */ |
---|
569 | if (combinecaps (dp, &d) < 0) |
---|
570 | exit (1); |
---|
571 | } |
---|
572 | else |
---|
573 | { |
---|
574 | /* Insert a new word into the dictionary */ |
---|
575 | for (dp = &hashtbl[h]; dp->next != NULL; ) |
---|
576 | dp = dp->next; |
---|
577 | lastdp = dp; |
---|
578 | dp = (struct dent *) mymalloc (sizeof (struct dent)); |
---|
579 | if (dp == NULL) |
---|
580 | { |
---|
581 | (void) fprintf (stderr, BHASH_C_COLLISION_SPACE); |
---|
582 | exit (1); |
---|
583 | } |
---|
584 | *dp = d; |
---|
585 | lastdp->next = dp; |
---|
586 | dp->next = NULL; |
---|
587 | #ifndef NO_CAPITALIZATION_SUPPORT |
---|
588 | /* |
---|
589 | ** If it's a followcase word, we need to make this a |
---|
590 | ** special dummy entry, and add a second with the |
---|
591 | ** correct capitalization. |
---|
592 | */ |
---|
593 | if (captype (d.flagfield) == FOLLOWCASE) |
---|
594 | { |
---|
595 | if (addvheader (dp)) |
---|
596 | exit (1); |
---|
597 | } |
---|
598 | #endif |
---|
599 | } |
---|
600 | } |
---|
601 | } |
---|
602 | if (!silent) |
---|
603 | (void) fprintf (stderr, "\n"); |
---|
604 | (void) fclose (dictf); |
---|
605 | } |
---|
606 | |
---|
607 | static void newcount () |
---|
608 | { |
---|
609 | char buf[INPUTWORDLEN + MAXAFFIXLEN + 2 * MASKBITS]; |
---|
610 | #ifndef NO_CAPITALIZATION_SUPPORT |
---|
611 | ichar_t ibuf[INPUTWORDLEN + MAXAFFIXLEN + 2 * MASKBITS]; |
---|
612 | #endif |
---|
613 | register FILE * d; |
---|
614 | register int i; |
---|
615 | #ifndef NO_CAPITALIZATION_SUPPORT |
---|
616 | ichar_t lastibuf[sizeof ibuf / sizeof (ichar_t)]; |
---|
617 | int headercounted; |
---|
618 | int followcase; |
---|
619 | register char * cp; |
---|
620 | #endif |
---|
621 | |
---|
622 | if (!silent) |
---|
623 | (void) fprintf (stderr, BHASH_C_COUNTING); |
---|
624 | |
---|
625 | if ((d = fopen (Dfile, "r")) == NULL) |
---|
626 | { |
---|
627 | (void) fprintf (stderr, BHASH_C_CANT_OPEN_DICT); |
---|
628 | exit (1); |
---|
629 | } |
---|
630 | |
---|
631 | #ifndef NO_CAPITALIZATION_SUPPORT |
---|
632 | headercounted = 0; |
---|
633 | lastibuf[0] = 0; |
---|
634 | #endif |
---|
635 | for (i = 0; fgets (buf, sizeof buf, d); ) |
---|
636 | { |
---|
637 | if ((++i % 1000) == 0 && !silent) |
---|
638 | { |
---|
639 | (void) fprintf (stderr, "%d ", i); |
---|
640 | (void) fflush (stdout); |
---|
641 | } |
---|
642 | #ifndef NO_CAPITALIZATION_SUPPORT |
---|
643 | cp = index (buf, hashheader.flagmarker); |
---|
644 | if (cp != NULL) |
---|
645 | *cp = '\0'; |
---|
646 | if (strtoichar (ibuf, buf, INPUTWORDLEN * sizeof (ichar_t), 1)) |
---|
647 | (void) fprintf (stderr, WORD_TOO_LONG (buf)); |
---|
648 | followcase = (whatcap (ibuf) == FOLLOWCASE); |
---|
649 | upcase (ibuf); |
---|
650 | if (icharcmp (ibuf, lastibuf) != 0) |
---|
651 | headercounted = 0; |
---|
652 | else if (!headercounted) |
---|
653 | { |
---|
654 | /* First duplicate will take two entries */ |
---|
655 | if ((++i % 1000) == 0 && !silent) |
---|
656 | { |
---|
657 | (void) fprintf (stderr, "%d ", i); |
---|
658 | (void) fflush (stdout); |
---|
659 | } |
---|
660 | headercounted = 1; |
---|
661 | } |
---|
662 | if (!headercounted && followcase) |
---|
663 | { |
---|
664 | /* It's followcase and the first entry -- count again */ |
---|
665 | if ((++i % 1000) == 0 && !silent) |
---|
666 | { |
---|
667 | (void) fprintf (stderr, "%d ", i); |
---|
668 | (void) fflush (stdout); |
---|
669 | } |
---|
670 | headercounted = 1; |
---|
671 | } |
---|
672 | (void) icharcpy (lastibuf, ibuf); |
---|
673 | #endif |
---|
674 | } |
---|
675 | (void) fclose (d); |
---|
676 | if (!silent) |
---|
677 | (void) fprintf (stderr, BHASH_C_WORD_COUNT, i); |
---|
678 | if ((d = fopen (Cfile, "w")) == NULL) |
---|
679 | { |
---|
680 | (void) fprintf (stderr, CANT_CREATE, Cfile); |
---|
681 | exit (1); |
---|
682 | } |
---|
683 | (void) fprintf (d, "%d\n", i); |
---|
684 | (void) fclose (d); |
---|
685 | } |
---|