1 | /* |
---|
2 | * $Id: ispell.h,v 1.1.1.1 1997-09-03 21:08:12 ghudson Exp $ |
---|
3 | */ |
---|
4 | |
---|
5 | /* |
---|
6 | * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA |
---|
7 | * All rights reserved. |
---|
8 | * |
---|
9 | * Redistribution and use in source and binary forms, with or without |
---|
10 | * modification, are permitted provided that the following conditions |
---|
11 | * are met: |
---|
12 | * |
---|
13 | * 1. Redistributions of source code must retain the above copyright |
---|
14 | * notice, this list of conditions and the following disclaimer. |
---|
15 | * 2. Redistributions in binary form must reproduce the above copyright |
---|
16 | * notice, this list of conditions and the following disclaimer in the |
---|
17 | * documentation and/or other materials provided with the distribution. |
---|
18 | * 3. All modifications to the source code must be clearly marked as |
---|
19 | * such. Binary redistributions based on modified source code |
---|
20 | * must be clearly marked as modified versions in the documentation |
---|
21 | * and/or other materials provided with the distribution. |
---|
22 | * 4. All advertising materials mentioning features or use of this software |
---|
23 | * must display the following acknowledgment: |
---|
24 | * This product includes software developed by Geoff Kuenning and |
---|
25 | * other unpaid contributors. |
---|
26 | * 5. The name of Geoff Kuenning may not be used to endorse or promote |
---|
27 | * products derived from this software without specific prior |
---|
28 | * written permission. |
---|
29 | * |
---|
30 | * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND |
---|
31 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
---|
32 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
---|
33 | * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE |
---|
34 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
---|
35 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
---|
36 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
---|
37 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
---|
38 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
---|
39 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
---|
40 | * SUCH DAMAGE. |
---|
41 | */ |
---|
42 | |
---|
43 | /* |
---|
44 | * $Log: not supported by cvs2svn $ |
---|
45 | * Revision 1.68 1995/03/06 02:42:41 geoff |
---|
46 | * Be vastly more paranoid about parenthesizing macro arguments. This |
---|
47 | * fixes a bug in defmt.c where a complex argument was passed to |
---|
48 | * isstringch. |
---|
49 | * |
---|
50 | * Revision 1.67 1995/01/03 19:24:12 geoff |
---|
51 | * Get rid of a non-global declaration. |
---|
52 | * |
---|
53 | * Revision 1.66 1994/12/27 23:08:49 geoff |
---|
54 | * Fix a lot of subtly bad assumptions about the widths of ints and longs |
---|
55 | * which only show up on 64-bit machines like the Cray and the DEC Alpha. |
---|
56 | * |
---|
57 | * Revision 1.65 1994/11/02 06:56:10 geoff |
---|
58 | * Remove the anyword feature, which I've decided is a bad idea. |
---|
59 | * |
---|
60 | * Revision 1.64 1994/10/25 05:46:18 geoff |
---|
61 | * Add the FF_ANYWORD flag for defining an affix that will apply to any |
---|
62 | * word, even if not explicitly specified. (Good for French.) |
---|
63 | * |
---|
64 | * Revision 1.63 1994/09/16 04:48:28 geoff |
---|
65 | * Make stringdups and laststringch unsigned ints, and dupnos a plain |
---|
66 | * int, so that we can handle more than 128 stringchars and stringchar |
---|
67 | * types. |
---|
68 | * |
---|
69 | * Revision 1.62 1994/09/01 06:06:39 geoff |
---|
70 | * Change erasechar/killchar to uerasechar/ukillchar to avoid |
---|
71 | * shared-library problems on HP systems. |
---|
72 | * |
---|
73 | * Revision 1.61 1994/08/31 05:58:35 geoff |
---|
74 | * Add contextoffset, used in -a mode to handle extremely long lines. |
---|
75 | * |
---|
76 | * Revision 1.60 1994/05/17 06:44:15 geoff |
---|
77 | * Add support for controlled compound formation and the COMPOUNDONLY |
---|
78 | * option to affix flags. |
---|
79 | * |
---|
80 | * Revision 1.59 1994/03/15 06:25:16 geoff |
---|
81 | * Change deftflag's initialization so we can tell if -t/-n appeared. |
---|
82 | * |
---|
83 | * Revision 1.58 1994/02/07 05:53:28 geoff |
---|
84 | * Add typecasts to the the 7-bit versions of ichar* routines |
---|
85 | * |
---|
86 | * Revision 1.57 1994/01/25 07:11:48 geoff |
---|
87 | * Get rid of all old RCS log lines in preparation for the 3.1 release. |
---|
88 | * |
---|
89 | */ |
---|
90 | |
---|
91 | #include <stdio.h> |
---|
92 | |
---|
93 | #ifdef __STDC__ |
---|
94 | #define P(x) x |
---|
95 | #define VOID void |
---|
96 | #else /* __STDC__ */ |
---|
97 | #define P(x) () |
---|
98 | #define VOID char |
---|
99 | #define const |
---|
100 | #endif /* __STDC__ */ |
---|
101 | |
---|
102 | #ifdef NO8BIT |
---|
103 | #define SET_SIZE 128 |
---|
104 | #else |
---|
105 | #define SET_SIZE 256 |
---|
106 | #endif |
---|
107 | |
---|
108 | #define MASKSIZE (MASKBITS / MASKTYPE_WIDTH) |
---|
109 | |
---|
110 | #ifdef lint |
---|
111 | extern void SETMASKBIT P ((MASKTYPE * mask, int bit)); |
---|
112 | extern void CLRMASKBIT P ((MASKTYPE * mask, int bit)); |
---|
113 | extern int TSTMASKBIT P ((MASKTYPE * mask, int bit)); |
---|
114 | #else /* lint */ |
---|
115 | /* The following is really testing for MASKSIZE <= 1, but cpp can't do that */ |
---|
116 | #if MASKBITS <= MASKTYPE_WIDTH |
---|
117 | #define SETMASKBIT(mask, bit) ((mask)[0] |= (MASKTYPE) 1 << (bit)) |
---|
118 | #define CLRMASKBIT(mask, bit) ((mask)[0] &= (MASKTYPE) ~(1 << (bit))) |
---|
119 | #define TSTMASKBIT(mask, bit) ((mask)[0] & ((MASKTYPE) 1 << (bit))) |
---|
120 | #else |
---|
121 | #define SETMASKBIT(mask, bit) \ |
---|
122 | ((mask)[(bit) / MASKTYPE_WIDTH] |= \ |
---|
123 | (MASKTYPE) 1 << ((bit) & (MASKTYPE_WIDTH - 1))) |
---|
124 | #define CLRMASKBIT(mask, bit) \ |
---|
125 | ((mask)[(bit) / MASKTYPE_WIDTH] &= \ |
---|
126 | ~((MASKTYPE) 1 << ((bit) & (MASKTYPE_WIDTH - 1)))) |
---|
127 | #define TSTMASKBIT(mask, bit) \ |
---|
128 | ((mask)[(bit) / MASKTYPE_WIDTH] & \ |
---|
129 | ((MASKTYPE) 1 << ((bit) & (MASKTYPE_WIDTH - 1)))) |
---|
130 | #endif |
---|
131 | #endif /* lint */ |
---|
132 | |
---|
133 | #if MASKBITS > 64 |
---|
134 | #define FULLMASKSET |
---|
135 | #endif |
---|
136 | |
---|
137 | #ifdef lint |
---|
138 | extern int BITTOCHAR P ((int bit)); |
---|
139 | extern int CHARTOBIT P ((int ch)); |
---|
140 | #endif /* lint */ |
---|
141 | |
---|
142 | #if MASKBITS <= 32 |
---|
143 | # ifndef lint |
---|
144 | #define BITTOCHAR(bit) ((bit) + 'A') |
---|
145 | #define CHARTOBIT(ch) ((ch) - 'A') |
---|
146 | # endif /* lint */ |
---|
147 | #define LARGESTFLAG 26 /* 5 are needed for flagfield below */ |
---|
148 | #define FLAGBASE ((MASKTYPE_WIDTH) - 6) |
---|
149 | #else |
---|
150 | # if MASKBITS <= 64 |
---|
151 | # ifndef lint |
---|
152 | #define BITTOCHAR(bit) ((bit) + 'A') |
---|
153 | #define CHARTOBIT(ch) ((ch) - 'A') |
---|
154 | # endif /* lint */ |
---|
155 | #define LARGESTFLAG (64 - 6) /* 5 are needed for flagfield below */ |
---|
156 | #define FLAGBASE ((MASKTYPE_WIDTH) - 6) |
---|
157 | # else |
---|
158 | # ifndef lint |
---|
159 | #define BITTOCHAR(bit) (bit) |
---|
160 | #define CHARTOBIT(ch) (ch) |
---|
161 | # endif /* lint */ |
---|
162 | #define LARGESTFLAG MASKBITS /* flagfield is a separate field */ |
---|
163 | #define FLAGBASE 0 |
---|
164 | # endif |
---|
165 | #endif |
---|
166 | |
---|
167 | /* |
---|
168 | ** Data type for internal word storage. If necessary, we use shorts rather |
---|
169 | ** than chars so that string characters can be encoded as a single unit. |
---|
170 | */ |
---|
171 | #if (SET_SIZE + MAXSTRINGCHARS) <= 256 |
---|
172 | #ifndef lint |
---|
173 | #define ICHAR_IS_CHAR |
---|
174 | #endif /* lint */ |
---|
175 | #endif |
---|
176 | |
---|
177 | #ifdef ICHAR_IS_CHAR |
---|
178 | typedef unsigned char ichar_t; /* Internal character */ |
---|
179 | #define icharlen(s) strlen ((char *) (s)) |
---|
180 | #define icharcpy(a, b) strcpy ((char *) (a), (char *) (b)) |
---|
181 | #define icharcmp(a, b) strcmp ((char *) (a), (char *) (b)) |
---|
182 | #define icharncmp(a, b, n) strncmp ((char *) (a), (char *) (b), (n)) |
---|
183 | #define chartoichar(x) ((ichar_t) (x)) |
---|
184 | #else |
---|
185 | typedef unsigned short ichar_t; /* Internal character */ |
---|
186 | #define chartoichar(x) ((ichar_t) (unsigned char) (x)) |
---|
187 | #endif |
---|
188 | |
---|
189 | struct dent |
---|
190 | { |
---|
191 | struct dent * next; |
---|
192 | char * word; |
---|
193 | MASKTYPE mask[MASKSIZE]; |
---|
194 | #ifdef FULLMASKSET |
---|
195 | char flags; |
---|
196 | #endif |
---|
197 | }; |
---|
198 | |
---|
199 | /* |
---|
200 | ** Flags in the directory entry. If FULLMASKSET is undefined, these are |
---|
201 | ** stored in the highest bits of the last longword of the mask field. If |
---|
202 | ** FULLMASKSET is defined, they are stored in the extra "flags" field. |
---|
203 | #ifndef NO_CAPITALIZATION_SUPPORT |
---|
204 | ** |
---|
205 | ** If a word has only one capitalization form, and that form is not |
---|
206 | ** FOLLOWCASE, it will have exactly one entry in the dictionary. The |
---|
207 | ** legal capitalizations will be indicated by the 2-bit capitalization |
---|
208 | ** field, as follows: |
---|
209 | ** |
---|
210 | ** ALLCAPS The word must appear in all capitals. |
---|
211 | ** CAPITALIZED The word must be capitalized (e.g., London). |
---|
212 | ** It will also be accepted in all capitals. |
---|
213 | ** ANYCASE The word may appear in lowercase, capitalized, |
---|
214 | ** or all-capitals. |
---|
215 | ** |
---|
216 | ** Regardless of the capitalization flags, the "word" field of the entry |
---|
217 | ** will point to an all-uppercase copy of the word. This is to simplify |
---|
218 | ** the large portion of the code that doesn't care about capitalization. |
---|
219 | ** Ispell will generate the correct version when needed. |
---|
220 | ** |
---|
221 | ** If a word has more than one capitalization, there will be multiple |
---|
222 | ** entries for it, linked together by the "next" field. The initial |
---|
223 | ** entry for such words will be a dummy entry, primarily for use by code |
---|
224 | ** that ignores capitalization. The "word" field of this entry will |
---|
225 | ** again point to an all-uppercase copy of the word. The "mask" field |
---|
226 | ** will contain the logical OR of the mask fields of all variants. |
---|
227 | ** A header entry is indicated by a capitalization type of ALLCAPS, |
---|
228 | ** with the MOREVARIANTS bit set. |
---|
229 | ** |
---|
230 | ** The following entries will define the individual variants. Each |
---|
231 | ** entry except the last has the MOREVARIANTS flag set, and each |
---|
232 | ** contains one of the following capitalization options: |
---|
233 | ** |
---|
234 | ** ALLCAPS The word must appear in all capitals. |
---|
235 | ** CAPITALIZED The word must be capitalized (e.g., London). |
---|
236 | ** It will also be accepted in all capitals. |
---|
237 | ** FOLLOWCASE The word must be capitalized exactly like the |
---|
238 | ** sample in the entry. Prefix (suffix) characters |
---|
239 | ** must be rendered in the case of the first (last) |
---|
240 | ** "alphabetic" character. It will also be accepted |
---|
241 | ** in all capitals. ("Alphabetic" means "mentioned |
---|
242 | ** in a 'casechars' statement".) |
---|
243 | ** ANYCASE The word may appear in lowercase, capitalized, |
---|
244 | ** or all-capitals. |
---|
245 | ** |
---|
246 | ** The "mask" field for the entry contains only the affix flag bits that |
---|
247 | ** are legal for that capitalization. The "word" field will be null |
---|
248 | ** except for FOLLOWCASE entries, where it will point to the |
---|
249 | ** correctly-capitalized spelling of the root word. |
---|
250 | ** |
---|
251 | ** It is worth discussing why the ALLCAPS option is used in |
---|
252 | ** the header entry. The header entry accepts an all-capitals |
---|
253 | ** version of the root plus every affix (this is always legal, since |
---|
254 | ** words get capitalized in headers and so forth). Further, all of |
---|
255 | ** the following variant entries will reject any all-capitals form |
---|
256 | ** that is illegal due to an affix. |
---|
257 | ** |
---|
258 | ** Finally, note that variations in the KEEP flag can cause a multiple-variant |
---|
259 | ** entry as well. For example, if the personal dictionary contains "ALPHA", |
---|
260 | ** (KEEP flag set) and the user adds "alpha" with the KEEP flag clear, a |
---|
261 | ** multiple-variant entry will be created so that "alpha" will be accepted |
---|
262 | ** but only "ALPHA" will actually be kept. |
---|
263 | #endif |
---|
264 | */ |
---|
265 | #ifdef FULLMASKSET |
---|
266 | #define flagfield flags |
---|
267 | #else |
---|
268 | #define flagfield mask[MASKSIZE - 1] |
---|
269 | #endif |
---|
270 | #define USED ((MASKTYPE) 1 << (FLAGBASE + 0)) |
---|
271 | #define KEEP ((MASKTYPE) 1 << (FLAGBASE + 1)) |
---|
272 | #ifdef NO_CAPITALIZATION_SUPPORT |
---|
273 | #define ALLFLAGS (USED | KEEP) |
---|
274 | #else /* NO_CAPITALIZATION_SUPPORT */ |
---|
275 | #define ANYCASE ((MASKTYPE) 0 << (FLAGBASE + 2)) |
---|
276 | #define ALLCAPS ((MASKTYPE) 1 << (FLAGBASE + 2)) |
---|
277 | #define CAPITALIZED ((MASKTYPE) 2 << (FLAGBASE + 2)) |
---|
278 | #define FOLLOWCASE ((MASKTYPE) 3 << (FLAGBASE + 2)) |
---|
279 | #define CAPTYPEMASK ((MASKTYPE) 3 << (FLAGBASE + 2)) |
---|
280 | #define MOREVARIANTS ((MASKTYPE) 1 << (FLAGBASE + 4)) |
---|
281 | #define ALLFLAGS (USED | KEEP | CAPTYPEMASK | MOREVARIANTS) |
---|
282 | #define captype(x) ((x) & CAPTYPEMASK) |
---|
283 | #endif /* NO_CAPITALIZATION_SUPPORT */ |
---|
284 | |
---|
285 | /* |
---|
286 | * Language tables used to encode prefix and suffix information. |
---|
287 | */ |
---|
288 | struct flagent |
---|
289 | { |
---|
290 | ichar_t * strip; /* String to strip off */ |
---|
291 | ichar_t * affix; /* Affix to append */ |
---|
292 | short flagbit; /* Flag bit this ent matches */ |
---|
293 | short stripl; /* Length of strip */ |
---|
294 | short affl; /* Length of affix */ |
---|
295 | short numconds; /* Number of char conditions */ |
---|
296 | short flagflags; /* Modifiers on this flag */ |
---|
297 | char conds[SET_SIZE + MAXSTRINGCHARS]; /* Adj. char conds */ |
---|
298 | }; |
---|
299 | |
---|
300 | /* |
---|
301 | * Bits in flagflags |
---|
302 | */ |
---|
303 | #define FF_CROSSPRODUCT (1 << 0) /* Affix does cross-products */ |
---|
304 | #define FF_COMPOUNDONLY (1 << 1) /* Afx works in compounds */ |
---|
305 | |
---|
306 | union ptr_union /* Aid for building flg ptrs */ |
---|
307 | { |
---|
308 | struct flagptr * fp; /* Pointer to more indexing */ |
---|
309 | struct flagent * ent; /* First of a list of ents */ |
---|
310 | }; |
---|
311 | |
---|
312 | struct flagptr |
---|
313 | { |
---|
314 | union ptr_union pu; /* Ent list or more indexes */ |
---|
315 | int numents; /* If zero, pu.fp is valid */ |
---|
316 | }; |
---|
317 | |
---|
318 | /* |
---|
319 | * Description of a single string character type. |
---|
320 | */ |
---|
321 | struct strchartype |
---|
322 | { |
---|
323 | char * name; /* Name of the type */ |
---|
324 | char * deformatter; /* Deformatter to use */ |
---|
325 | char * suffixes; /* File suffixes, null seps */ |
---|
326 | }; |
---|
327 | |
---|
328 | /* |
---|
329 | * Header placed at the beginning of the hash file. |
---|
330 | */ |
---|
331 | struct hashheader |
---|
332 | { |
---|
333 | unsigned short magic; /* Magic number for ID */ |
---|
334 | unsigned short compileoptions; /* How we were compiled */ |
---|
335 | short maxstringchars; /* Max # strchrs we support */ |
---|
336 | short maxstringcharlen; /* Max strchr len supported */ |
---|
337 | short compoundmin; /* Min lth of compound parts */ |
---|
338 | short compoundbit; /* Flag 4 compounding roots */ |
---|
339 | int stringsize; /* Size of string table */ |
---|
340 | int lstringsize; /* Size of lang. str tbl */ |
---|
341 | int tblsize; /* No. entries in hash tbl */ |
---|
342 | int stblsize; /* No. entries in sfx tbl */ |
---|
343 | int ptblsize; /* No. entries in pfx tbl */ |
---|
344 | int sortval; /* Largest sort ID assigned */ |
---|
345 | int nstrchars; /* No. strchars defined */ |
---|
346 | int nstrchartype; /* No. strchar types */ |
---|
347 | int strtypestart; /* Start of strtype table */ |
---|
348 | char nrchars[5]; /* Nroff special characters */ |
---|
349 | char texchars[13]; /* TeX special characters */ |
---|
350 | char compoundflag; /* Compund-word handling */ |
---|
351 | char defhardflag; /* Default tryveryhard flag */ |
---|
352 | char flagmarker; /* "Start-of-flags" char */ |
---|
353 | unsigned short sortorder[SET_SIZE + MAXSTRINGCHARS]; /* Sort ordering */ |
---|
354 | ichar_t lowerconv[SET_SIZE + MAXSTRINGCHARS]; /* Lower-conversion table */ |
---|
355 | ichar_t upperconv[SET_SIZE + MAXSTRINGCHARS]; /* Upper-conversion table */ |
---|
356 | char wordchars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for chars found in wrds */ |
---|
357 | char upperchars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for uppercase chars */ |
---|
358 | char lowerchars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for lowercase chars */ |
---|
359 | char boundarychars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for boundary chars */ |
---|
360 | char stringstarts[SET_SIZE]; /* NZ if char can start str */ |
---|
361 | char stringchars[MAXSTRINGCHARS][MAXSTRINGCHARLEN + 1]; /* String chars */ |
---|
362 | unsigned int stringdups[MAXSTRINGCHARS]; /* No. of "base" char */ |
---|
363 | int dupnos[MAXSTRINGCHARS]; /* Dup char ID # */ |
---|
364 | unsigned short magic2; /* Second magic for dbl chk */ |
---|
365 | }; |
---|
366 | |
---|
367 | /* hash table magic number */ |
---|
368 | #define MAGIC 0x9602 |
---|
369 | |
---|
370 | /* compile options, put in the hash header for consistency checking */ |
---|
371 | #ifdef NO8BIT |
---|
372 | # define MAGIC8BIT 0x01 |
---|
373 | #else |
---|
374 | # define MAGIC8BIT 0x00 |
---|
375 | #endif |
---|
376 | #ifdef NO_CAPITALIZATION_SUPPORT |
---|
377 | # define MAGICCAPITALIZATION 0x00 |
---|
378 | #else |
---|
379 | # define MAGICCAPITALIZATION 0x02 |
---|
380 | #endif |
---|
381 | #if MASKBITS <= 32 |
---|
382 | # define MAGICMASKSET 0x00 |
---|
383 | #else |
---|
384 | # if MASKBITS <= 64 |
---|
385 | # define MAGICMASKSET 0x04 |
---|
386 | # else |
---|
387 | # if MASKBITS <= 128 |
---|
388 | # define MAGICMASKSET 0x08 |
---|
389 | # else |
---|
390 | # define MAGICMASKSET 0x0C |
---|
391 | # endif |
---|
392 | # endif |
---|
393 | #endif |
---|
394 | |
---|
395 | #define COMPILEOPTIONS (MAGIC8BIT | MAGICCAPITALIZATION | MAGICMASKSET) |
---|
396 | |
---|
397 | /* |
---|
398 | * Structure used to record data about successful lookups; these values |
---|
399 | * are used in the ins_root_cap routine to produce correct capitalizations. |
---|
400 | */ |
---|
401 | struct success |
---|
402 | { |
---|
403 | struct dent * dictent; /* Header of dict entry chain for wd */ |
---|
404 | struct flagent * prefix; /* Prefix flag used, or NULL */ |
---|
405 | struct flagent * suffix; /* Suffix flag used, or NULL */ |
---|
406 | }; |
---|
407 | |
---|
408 | /* |
---|
409 | ** Offsets into the nroff special-character array |
---|
410 | */ |
---|
411 | #define NRLEFTPAREN hashheader.nrchars[0] |
---|
412 | #define NRRIGHTPAREN hashheader.nrchars[1] |
---|
413 | #define NRDOT hashheader.nrchars[2] |
---|
414 | #define NRBACKSLASH hashheader.nrchars[3] |
---|
415 | #define NRSTAR hashheader.nrchars[4] |
---|
416 | |
---|
417 | /* |
---|
418 | ** Offsets into the TeX special-character array |
---|
419 | */ |
---|
420 | #define TEXLEFTPAREN hashheader.texchars[0] |
---|
421 | #define TEXRIGHTPAREN hashheader.texchars[1] |
---|
422 | #define TEXLEFTSQUARE hashheader.texchars[2] |
---|
423 | #define TEXRIGHTSQUARE hashheader.texchars[3] |
---|
424 | #define TEXLEFTCURLY hashheader.texchars[4] |
---|
425 | #define TEXRIGHTCURLY hashheader.texchars[5] |
---|
426 | #define TEXLEFTANGLE hashheader.texchars[6] |
---|
427 | #define TEXRIGHTANGLE hashheader.texchars[7] |
---|
428 | #define TEXBACKSLASH hashheader.texchars[8] |
---|
429 | #define TEXDOLLAR hashheader.texchars[9] |
---|
430 | #define TEXSTAR hashheader.texchars[10] |
---|
431 | #define TEXDOT hashheader.texchars[11] |
---|
432 | #define TEXPERCENT hashheader.texchars[12] |
---|
433 | |
---|
434 | /* |
---|
435 | ** Values for compoundflag |
---|
436 | */ |
---|
437 | #define COMPOUND_NEVER 0 /* Compound words are never good */ |
---|
438 | #define COMPOUND_ANYTIME 1 /* Accept run-together words */ |
---|
439 | #define COMPOUND_CONTROLLED 2 /* Compounds controlled by afx flags */ |
---|
440 | |
---|
441 | /* |
---|
442 | ** The isXXXX macros normally only check ASCII range, and don't support |
---|
443 | ** the character sets of other languages. These private versions handle |
---|
444 | ** whatever character sets have been defined in the affix files. |
---|
445 | */ |
---|
446 | #ifdef lint |
---|
447 | extern int myupper P ((unsigned int ch)); |
---|
448 | extern int mylower P ((unsigned int ch)); |
---|
449 | extern int myspace P ((unsigned int ch)); |
---|
450 | extern int iswordch P ((unsigned int ch)); |
---|
451 | extern int isboundarych P ((unsigned int ch)); |
---|
452 | extern int isstringstart P ((unsigned int ch)); |
---|
453 | extern ichar_t mytolower P ((unsigned int ch)); |
---|
454 | extern ichar_t mytoupper P ((unsigned int ch)); |
---|
455 | #else /* lint */ |
---|
456 | #define myupper(X) (hashheader.upperchars[(X)]) |
---|
457 | #define mylower(X) (hashheader.lowerchars[(X)]) |
---|
458 | #define myspace(X) (((X) > 0) && ((X) < 0x80) \ |
---|
459 | && isspace((unsigned char) (X))) |
---|
460 | #define iswordch(X) (hashheader.wordchars[(X)]) |
---|
461 | #define isboundarych(X) (hashheader.boundarychars[(X)]) |
---|
462 | #define isstringstart(X) (hashheader.stringstarts[(unsigned char) (X)]) |
---|
463 | #define mytolower(X) (hashheader.lowerconv[(X)]) |
---|
464 | #define mytoupper(X) (hashheader.upperconv[(X)]) |
---|
465 | #endif /* lint */ |
---|
466 | |
---|
467 | /* |
---|
468 | ** These macros are similar to the ones above, but they take into account |
---|
469 | ** the possibility of string characters. Note well that they take a POINTER, |
---|
470 | ** not a character. |
---|
471 | ** |
---|
472 | ** The "l_" versions set "len" to the length of the string character as a |
---|
473 | ** handy side effect. (Note that the global "laststringch" is also set, |
---|
474 | ** and sometimes used, by these macros.) |
---|
475 | ** |
---|
476 | ** The "l1_" versions go one step further and guarantee that the "len" |
---|
477 | ** field is valid for *all* characters, being set to 1 even if the macro |
---|
478 | ** returns false. This macro is a great example of how NOT to write |
---|
479 | ** readable C. |
---|
480 | */ |
---|
481 | #define isstringch(ptr, canon) (isstringstart (*(ptr)) \ |
---|
482 | && stringcharlen ((ptr), (canon)) > 0) |
---|
483 | #define l_isstringch(ptr, len, canon) \ |
---|
484 | (isstringstart (*(ptr)) \ |
---|
485 | && (len = stringcharlen ((ptr), (canon))) \ |
---|
486 | > 0) |
---|
487 | #define l1_isstringch(ptr, len, canon) \ |
---|
488 | (len = 1, \ |
---|
489 | isstringstart (*(ptr)) \ |
---|
490 | && ((len = \ |
---|
491 | stringcharlen ((ptr), (canon))) \ |
---|
492 | > 0 \ |
---|
493 | ? 1 : (len = 1, 0))) |
---|
494 | |
---|
495 | /* |
---|
496 | * Sizes of buffers returned by ichartosstr/strtosichar. |
---|
497 | */ |
---|
498 | #define ICHARTOSSTR_SIZE (INPUTWORDLEN + 4 * MAXAFFIXLEN + 4) |
---|
499 | #define STRTOSICHAR_SIZE ((INPUTWORDLEN + 4 * MAXAFFIXLEN + 4) \ |
---|
500 | * sizeof (ichar_t)) |
---|
501 | |
---|
502 | /* |
---|
503 | * termcap variables |
---|
504 | */ |
---|
505 | #ifdef MAIN |
---|
506 | # define EXTERN /* nothing */ |
---|
507 | #else |
---|
508 | # define EXTERN extern |
---|
509 | #endif |
---|
510 | |
---|
511 | EXTERN char * BC; /* backspace if not ^H */ |
---|
512 | EXTERN char * cd; /* clear to end of display */ |
---|
513 | EXTERN char * cl; /* clear display */ |
---|
514 | EXTERN char * cm; /* cursor movement */ |
---|
515 | EXTERN char * ho; /* home */ |
---|
516 | EXTERN char * nd; /* non-destructive space */ |
---|
517 | EXTERN char * so; /* standout */ |
---|
518 | EXTERN char * se; /* standout end */ |
---|
519 | EXTERN int sg; /* space taken by so/se */ |
---|
520 | EXTERN char * ti; /* terminal initialization sequence */ |
---|
521 | EXTERN char * te; /* terminal termination sequence */ |
---|
522 | EXTERN int li; /* lines */ |
---|
523 | EXTERN int co; /* columns */ |
---|
524 | |
---|
525 | EXTERN int contextsize; /* number of lines of context to show */ |
---|
526 | EXTERN char contextbufs[MAXCONTEXT][BUFSIZ]; /* Context of current line */ |
---|
527 | EXTERN int contextoffset; /* Offset of line start in contextbufs[0] */ |
---|
528 | EXTERN char * currentchar; /* Location in contextbufs */ |
---|
529 | EXTERN char ctoken[INPUTWORDLEN + MAXAFFIXLEN]; /* Current token as char */ |
---|
530 | EXTERN ichar_t itoken[INPUTWORDLEN + MAXAFFIXLEN]; /* Ctoken as ichar_t str */ |
---|
531 | |
---|
532 | EXTERN char termcap[2048]; /* termcap entry */ |
---|
533 | EXTERN char termstr[2048]; /* for string values */ |
---|
534 | EXTERN char * termptr; /* pointer into termcap, used by tgetstr */ |
---|
535 | |
---|
536 | EXTERN int numhits; /* number of hits in dictionary lookups */ |
---|
537 | EXTERN struct success |
---|
538 | hits[MAX_HITS]; /* table of hits gotten in lookup */ |
---|
539 | |
---|
540 | EXTERN char * hashstrings; /* Strings in hash table */ |
---|
541 | EXTERN struct hashheader |
---|
542 | hashheader; /* Header of hash table */ |
---|
543 | EXTERN struct dent * |
---|
544 | hashtbl; /* Main hash table, for dictionary */ |
---|
545 | EXTERN int hashsize; /* Size of main hash table */ |
---|
546 | |
---|
547 | EXTERN char hashname[MAXPATHLEN]; /* Name of hash table file */ |
---|
548 | |
---|
549 | EXTERN int aflag; /* NZ if -a or -A option specified */ |
---|
550 | EXTERN int cflag; /* NZ if -c (crunch) option */ |
---|
551 | EXTERN int lflag; /* NZ if -l (list) option */ |
---|
552 | EXTERN int incfileflag; /* whether xgets() acts exactly like gets() */ |
---|
553 | EXTERN int nodictflag; /* NZ if dictionary not needed */ |
---|
554 | |
---|
555 | EXTERN int uerasechar; /* User's erase character, from stty */ |
---|
556 | EXTERN int ukillchar; /* User's kill character */ |
---|
557 | |
---|
558 | EXTERN unsigned int laststringch; /* Number of last string character */ |
---|
559 | EXTERN int defdupchar; /* Default duplicate string type */ |
---|
560 | |
---|
561 | EXTERN int numpflags; /* Number of prefix flags in table */ |
---|
562 | EXTERN int numsflags; /* Number of suffix flags in table */ |
---|
563 | EXTERN struct flagptr pflagindex[SET_SIZE + MAXSTRINGCHARS]; |
---|
564 | /* Fast index to pflaglist */ |
---|
565 | EXTERN struct flagent * pflaglist; /* Prefix flag control list */ |
---|
566 | EXTERN struct flagptr sflagindex[SET_SIZE + MAXSTRINGCHARS]; |
---|
567 | /* Fast index to sflaglist */ |
---|
568 | EXTERN struct flagent * sflaglist; /* Suffix flag control list */ |
---|
569 | |
---|
570 | EXTERN struct strchartype * /* String character type collection */ |
---|
571 | chartypes; |
---|
572 | |
---|
573 | EXTERN FILE * infile; /* File being corrected */ |
---|
574 | EXTERN FILE * outfile; /* Corrected copy of infile */ |
---|
575 | |
---|
576 | EXTERN char * askfilename; /* File specified in -f option */ |
---|
577 | |
---|
578 | EXTERN int changes; /* NZ if changes made to cur. file */ |
---|
579 | EXTERN int readonly; /* NZ if current file is readonly */ |
---|
580 | EXTERN int quit; /* NZ if we're done with this file */ |
---|
581 | |
---|
582 | #define MAXPOSSIBLE 100 /* Max no. of possibilities to generate */ |
---|
583 | |
---|
584 | EXTERN char possibilities[MAXPOSSIBLE][INPUTWORDLEN + MAXAFFIXLEN]; |
---|
585 | /* Table of possible corrections */ |
---|
586 | EXTERN int pcount; /* Count of possibilities generated */ |
---|
587 | EXTERN int maxposslen; /* Length of longest possibility */ |
---|
588 | EXTERN int easypossibilities; /* Number of "easy" corrections found */ |
---|
589 | /* ..(defined as those using legal affixes) */ |
---|
590 | |
---|
591 | /* |
---|
592 | * The following array contains a list of characters that should be tried |
---|
593 | * in "missingletter." Note that lowercase characters are omitted. |
---|
594 | */ |
---|
595 | EXTERN int Trynum; /* Size of "Try" array */ |
---|
596 | EXTERN ichar_t Try[SET_SIZE + MAXSTRINGCHARS]; |
---|
597 | |
---|
598 | /* |
---|
599 | * Initialized variables. These are generated using macros so that they |
---|
600 | * may be consistently declared in all programs. Numerous examples of |
---|
601 | * usage are given below. |
---|
602 | */ |
---|
603 | #ifdef MAIN |
---|
604 | #define INIT(decl, init) decl = init |
---|
605 | #else |
---|
606 | #define INIT(decl, init) extern decl |
---|
607 | #endif |
---|
608 | |
---|
609 | #ifdef MINIMENU |
---|
610 | INIT (int minimenusize, 2); /* MUST be either 2 or zero */ |
---|
611 | #else /* MINIMENU */ |
---|
612 | INIT (int minimenusize, 0); /* MUST be either 2 or zero */ |
---|
613 | #endif /* MINIMENU */ |
---|
614 | |
---|
615 | INIT (int eflag, 0); /* NZ for expand mode */ |
---|
616 | INIT (int dumpflag, 0); /* NZ to do dump mode */ |
---|
617 | INIT (int fflag, 0); /* NZ if -f specified */ |
---|
618 | #ifndef USG |
---|
619 | INIT (int sflag, 0); /* NZ to stop self after EOF */ |
---|
620 | #endif |
---|
621 | INIT (int vflag, 0); /* NZ to display characters as M-xxx */ |
---|
622 | INIT (int xflag, DEFNOBACKUPFLAG); /* NZ to suppress backups */ |
---|
623 | INIT (int deftflag, -1); /* NZ for TeX mode by default */ |
---|
624 | INIT (int tflag, DEFTEXFLAG); /* NZ for TeX mode in current file */ |
---|
625 | INIT (int prefstringchar, -1); /* Preferred string character type */ |
---|
626 | |
---|
627 | INIT (int terse, 0); /* NZ for "terse" mode */ |
---|
628 | |
---|
629 | INIT (char tempfile[MAXPATHLEN], ""); /* Name of file we're spelling into */ |
---|
630 | |
---|
631 | INIT (int minword, MINWORD); /* Longest always-legal word */ |
---|
632 | INIT (int sortit, 1); /* Sort suggestions alphabetically */ |
---|
633 | INIT (int compoundflag, -1); /* How to treat compounds: see above */ |
---|
634 | INIT (int tryhardflag, -1); /* Always call tryveryhard */ |
---|
635 | |
---|
636 | INIT (char * currentfile, NULL); /* Name of current input file */ |
---|
637 | |
---|
638 | /* Odd numbers for math mode in LaTeX; even for LR or paragraph mode */ |
---|
639 | INIT (int math_mode, 0); |
---|
640 | /* P -- paragraph or LR mode |
---|
641 | * b -- parsing a \begin statement |
---|
642 | * e -- parsing an \end statement |
---|
643 | * r -- parsing a \ref type of argument. |
---|
644 | * m -- looking for a \begin{minipage} argument. |
---|
645 | */ |
---|
646 | INIT (char LaTeX_Mode, 'P'); |
---|