1 | /* lang.c -- language-dependent support. |
---|
2 | $Id: lang.c,v 1.1.1.2 2003-02-28 17:44:41 amb Exp $ |
---|
3 | |
---|
4 | Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc. |
---|
5 | |
---|
6 | This program is free software; you can redistribute it and/or modify |
---|
7 | it under the terms of the GNU General Public License as published by |
---|
8 | the Free Software Foundation; either version 2, or (at your option) |
---|
9 | any later version. |
---|
10 | |
---|
11 | This program is distributed in the hope that it will be useful, |
---|
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
14 | GNU General Public License for more details. |
---|
15 | |
---|
16 | You should have received a copy of the GNU General Public License |
---|
17 | along with this program; if not, write to the Free Software |
---|
18 | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
---|
19 | |
---|
20 | Originally written by Karl Heinz Marbaise <kama@hippo.fido.de>. */ |
---|
21 | |
---|
22 | #include "system.h" |
---|
23 | #include "cmds.h" |
---|
24 | #include "lang.h" |
---|
25 | #include "makeinfo.h" |
---|
26 | #include "xml.h" |
---|
27 | |
---|
28 | /* Current document encoding. */ |
---|
29 | encoding_code_type document_encoding_code = no_encoding; |
---|
30 | |
---|
31 | /* Current language code; default is English. */ |
---|
32 | language_code_type language_code = en; |
---|
33 | |
---|
34 | iso_map_type us_ascii_map [] = {{NULL, 0, 0}}; /* ASCII map is trivial */ |
---|
35 | |
---|
36 | /* Translation table between HTML and ISO Codes. The last item is |
---|
37 | hopefully the Unicode. It might be possible that those Unicodes are |
---|
38 | not correct, cause I didn't check them. kama */ |
---|
39 | iso_map_type iso8859_1_map [] = { |
---|
40 | { "nbsp", 0xA0, 0x00A0 }, |
---|
41 | { "iexcl", 0xA1, 0x00A1 }, |
---|
42 | { "cent", 0xA2, 0x00A2 }, |
---|
43 | { "pound", 0xA3, 0x00A3 }, |
---|
44 | { "curren", 0xA4, 0x00A4 }, |
---|
45 | { "yen", 0xA5, 0x00A5 }, |
---|
46 | { "brkbar", 0xA6, 0x00A6 }, |
---|
47 | { "sect", 0xA7, 0x00A7 }, |
---|
48 | { "uml", 0xA8, 0x00A8 }, |
---|
49 | { "copy", 0xA9, 0x00A9 }, |
---|
50 | { "ordf", 0xAA, 0x00AA }, |
---|
51 | { "laquo", 0xAB, 0x00AB }, |
---|
52 | { "not", 0xAC, 0x00AC }, |
---|
53 | { "shy", 0xAD, 0x00AD }, |
---|
54 | { "reg", 0xAE, 0x00AE }, |
---|
55 | { "hibar", 0xAF, 0x00AF }, |
---|
56 | { "deg", 0xB0, 0x00B0 }, |
---|
57 | { "plusmn", 0xB1, 0x00B1 }, |
---|
58 | { "sup2", 0xB2, 0x00B2 }, |
---|
59 | { "sup3", 0xB3, 0x00B3 }, |
---|
60 | { "acute", 0xB4, 0x00B4 }, |
---|
61 | { "micro", 0xB5, 0x00B5 }, |
---|
62 | { "para", 0xB6, 0x00B6 }, |
---|
63 | { "middot", 0xB7, 0x00B7 }, |
---|
64 | { "cedil", 0xB8, 0x00B8 }, |
---|
65 | { "sup1", 0xB9, 0x00B9 }, |
---|
66 | { "ordm", 0xBA, 0x00BA }, |
---|
67 | { "raquo", 0xBB, 0x00BB }, |
---|
68 | { "frac14", 0xBC, 0x00BC }, |
---|
69 | { "frac12", 0xBD, 0x00BD }, |
---|
70 | { "frac34", 0xBE, 0x00BE }, |
---|
71 | { "iquest", 0xBF, 0x00BF }, |
---|
72 | { "Agrave", 0xC0, 0x00C0 }, |
---|
73 | { "Aacute", 0xC1, 0x00C1 }, |
---|
74 | { "Acirc", 0xC2, 0x00C2 }, |
---|
75 | { "Atilde", 0xC3, 0x00C3 }, |
---|
76 | { "Auml", 0xC4, 0x00C4 }, |
---|
77 | { "Aring", 0xC5, 0x00C5 }, |
---|
78 | { "AElig", 0xC6, 0x00C6 }, |
---|
79 | { "Ccedil", 0xC7, 0x00C7 }, |
---|
80 | { "Ccedil", 0xC7, 0x00C7 }, |
---|
81 | { "Egrave", 0xC8, 0x00C8 }, |
---|
82 | { "Eacute", 0xC9, 0x00C9 }, |
---|
83 | { "Ecirc", 0xCA, 0x00CA }, |
---|
84 | { "Euml", 0xCB, 0x00CB }, |
---|
85 | { "Igrave", 0xCC, 0x00CC }, |
---|
86 | { "Iacute", 0xCD, 0x00CD }, |
---|
87 | { "Icirc", 0xCE, 0x00CE }, |
---|
88 | { "Iuml", 0xCF, 0x00CF }, |
---|
89 | { "ETH", 0xD0, 0x00D0 }, /* I don't know ;-( */ |
---|
90 | { "Ntilde", 0xD1, 0x00D1 }, |
---|
91 | { "Ograve", 0xD2, 0x00D2 }, |
---|
92 | { "Oacute", 0xD3, 0x00D3 }, |
---|
93 | { "Ocirc", 0xD4, 0x00D4 }, |
---|
94 | { "Otilde", 0xD5, 0x00D5 }, |
---|
95 | { "Ouml", 0xD6, 0x00D6 }, |
---|
96 | { "times", 0xD7, 0x00D7 }, |
---|
97 | { "Oslash", 0xD8, 0x00D8 }, |
---|
98 | { "Ugrave", 0xD9, 0x00D9 }, |
---|
99 | { "Uacute", 0xDA, 0x00DA }, |
---|
100 | { "Ucirc", 0xDB, 0x00DB }, |
---|
101 | { "Uuml", 0xDC, 0x00DC }, |
---|
102 | { "Yacute", 0xDD, 0x00DD }, |
---|
103 | { "THORN", 0xDE, 0x00DE }, |
---|
104 | { "szlig", 0xDF, 0x00DF }, |
---|
105 | { "agrave", 0xE0, 0x00E0 }, |
---|
106 | { "aacute", 0xE1, 0x00E1 }, |
---|
107 | { "acirc", 0xE2, 0x00E2 }, |
---|
108 | { "atilde", 0xE3, 0x00E3 }, |
---|
109 | { "auml", 0xE4, 0x00E4 }, |
---|
110 | { "aring", 0xE5, 0x00E5 }, |
---|
111 | { "aelig", 0xE6, 0x00E6 }, |
---|
112 | { "ccedil", 0xE7, 0x00E7 }, |
---|
113 | { "egrave", 0xE8, 0x00E8 }, |
---|
114 | { "eacute", 0xE9, 0x00E9 }, |
---|
115 | { "ecirc", 0xEA, 0x00EA }, |
---|
116 | { "euml", 0xEB, 0x00EB }, |
---|
117 | { "igrave", 0xEC, 0x00EC }, |
---|
118 | { "iacute", 0xED, 0x00ED }, |
---|
119 | { "icirc", 0xEE, 0x00EE }, |
---|
120 | { "iuml", 0xEF, 0x00EF }, |
---|
121 | { "eth", 0xF0, 0x00F0 }, |
---|
122 | { "ntilde", 0xF1, 0x00F1 }, |
---|
123 | { "ograve", 0xF2, 0x00F2 }, |
---|
124 | { "oacute", 0xF3, 0x00F3 }, |
---|
125 | { "ocirc", 0xF4, 0x00F4 }, |
---|
126 | { "otilde", 0xF5, 0x00F5 }, |
---|
127 | { "ouml", 0xF6, 0x00F6 }, |
---|
128 | { "divide", 0xF7, 0x00F7 }, |
---|
129 | { "oslash", 0xF8, 0x00F8 }, |
---|
130 | { "ugrave", 0xF9, 0x00F9 }, |
---|
131 | { "uacute", 0xFA, 0x00FA }, |
---|
132 | { "ucirc", 0xFB, 0x00FB }, |
---|
133 | { "uuml", 0xFC, 0x00FC }, |
---|
134 | { "yacute", 0xFD, 0x00FD }, |
---|
135 | { "thorn", 0xFE, 0x00FE }, |
---|
136 | { "yuml", 0xFF, 0x00FF }, |
---|
137 | { NULL, 0, 0 } |
---|
138 | }; |
---|
139 | |
---|
140 | encoding_type encoding_table[] = { |
---|
141 | { no_encoding, "(no encoding)", NULL }, |
---|
142 | { US_ASCII, "US-ASCII", us_ascii_map }, |
---|
143 | { ISO_8859_1, "ISO-8859-1", (iso_map_type *) iso8859_1_map }, |
---|
144 | { ISO_8859_2, "ISO-8859-2", NULL }, |
---|
145 | { ISO_8859_3, "ISO-8859-3", NULL }, |
---|
146 | { ISO_8859_4, "ISO-8859-4", NULL }, |
---|
147 | { ISO_8859_5, "ISO-8859-5", NULL }, |
---|
148 | { ISO_8859_6, "ISO-8859-6", NULL }, |
---|
149 | { ISO_8859_7, "ISO-8859-7", NULL }, |
---|
150 | { ISO_8859_8, "ISO-8859-8", NULL }, |
---|
151 | { ISO_8859_9, "ISO-8859-9", NULL }, |
---|
152 | { ISO_8859_10, "ISO-8859-10", NULL }, |
---|
153 | { ISO_8859_11, "ISO-8859-11", NULL }, |
---|
154 | { ISO_8859_12, "ISO-8859-12", NULL }, |
---|
155 | { ISO_8859_13, "ISO-8859-13", NULL }, |
---|
156 | { ISO_8859_14, "ISO-8859-14", NULL }, |
---|
157 | { ISO_8859_15, "ISO-8859-15", NULL }, |
---|
158 | { last_encoding_code, NULL, NULL } |
---|
159 | }; |
---|
160 | |
---|
161 | |
---|
162 | language_type language_table[] = { |
---|
163 | { aa, "aa", "Afar" }, |
---|
164 | { ab, "ab", "Abkhazian" }, |
---|
165 | { af, "af", "Afrikaans" }, |
---|
166 | { am, "am", "Amharic" }, |
---|
167 | { ar, "ar", "Arabic" }, |
---|
168 | { as, "as", "Assamese" }, |
---|
169 | { ay, "ay", "Aymara" }, |
---|
170 | { az, "az", "Azerbaijani" }, |
---|
171 | { ba, "ba", "Bashkir" }, |
---|
172 | { be, "be", "Byelorussian" }, |
---|
173 | { bg, "bg", "Bulgarian" }, |
---|
174 | { bh, "bh", "Bihari" }, |
---|
175 | { bi, "bi", "Bislama" }, |
---|
176 | { bn, "bn", "Bengali; Bangla" }, |
---|
177 | { bo, "bo", "Tibetan" }, |
---|
178 | { br, "br", "Breton" }, |
---|
179 | { ca, "ca", "Catalan" }, |
---|
180 | { co, "co", "Corsican" }, |
---|
181 | { cs, "cs", "Czech" }, |
---|
182 | { cy, "cy", "Welsh" }, |
---|
183 | { da, "da", "Danish" }, |
---|
184 | { de, "de", "German" }, |
---|
185 | { dz, "dz", "Bhutani" }, |
---|
186 | { el, "el", "Greek" }, |
---|
187 | { en, "en", "English" }, |
---|
188 | { eo, "eo", "Esperanto" }, |
---|
189 | { es, "es", "Spanish" }, |
---|
190 | { et, "et", "Estonian" }, |
---|
191 | { eu, "eu", "Basque" }, |
---|
192 | { fa, "fa", "Persian" }, |
---|
193 | { fi, "fi", "Finnish" }, |
---|
194 | { fj, "fj", "Fiji" }, |
---|
195 | { fo, "fo", "Faroese" }, |
---|
196 | { fr, "fr", "French" }, |
---|
197 | { fy, "fy", "Frisian" }, |
---|
198 | { ga, "ga", "Irish" }, |
---|
199 | { gd, "gd", "Scots Gaelic" }, |
---|
200 | { gl, "gl", "Galician" }, |
---|
201 | { gn, "gn", "Guarani" }, |
---|
202 | { gu, "gu", "Gujarati" }, |
---|
203 | { ha, "ha", "Hausa" }, |
---|
204 | { he, "he", "Hebrew" } /* (formerly iw) */, |
---|
205 | { hi, "hi", "Hindi" }, |
---|
206 | { hr, "hr", "Croatian" }, |
---|
207 | { hu, "hu", "Hungarian" }, |
---|
208 | { hy, "hy", "Armenian" }, |
---|
209 | { ia, "ia", "Interlingua" }, |
---|
210 | { id, "id", "Indonesian" } /* (formerly in) */, |
---|
211 | { ie, "ie", "Interlingue" }, |
---|
212 | { ik, "ik", "Inupiak" }, |
---|
213 | { is, "is", "Icelandic" }, |
---|
214 | { it, "it", "Italian" }, |
---|
215 | { iu, "iu", "Inuktitut" }, |
---|
216 | { ja, "ja", "Japanese" }, |
---|
217 | { jw, "jw", "Javanese" }, |
---|
218 | { ka, "ka", "Georgian" }, |
---|
219 | { kk, "kk", "Kazakh" }, |
---|
220 | { kl, "kl", "Greenlandic" }, |
---|
221 | { km, "km", "Cambodian" }, |
---|
222 | { kn, "kn", "Kannada" }, |
---|
223 | { ko, "ko", "Korean" }, |
---|
224 | { ks, "ks", "Kashmiri" }, |
---|
225 | { ku, "ku", "Kurdish" }, |
---|
226 | { ky, "ky", "Kirghiz" }, |
---|
227 | { la, "la", "Latin" }, |
---|
228 | { ln, "ln", "Lingala" }, |
---|
229 | { lo, "lo", "Laothian" }, |
---|
230 | { lt, "lt", "Lithuanian" }, |
---|
231 | { lv, "lv", "Latvian, Lettish" }, |
---|
232 | { mg, "mg", "Malagasy" }, |
---|
233 | { mi, "mi", "Maori" }, |
---|
234 | { mk, "mk", "Macedonian" }, |
---|
235 | { ml, "ml", "Malayalam" }, |
---|
236 | { mn, "mn", "Mongolian" }, |
---|
237 | { mo, "mo", "Moldavian" }, |
---|
238 | { mr, "mr", "Marathi" }, |
---|
239 | { ms, "ms", "Malay" }, |
---|
240 | { mt, "mt", "Maltese" }, |
---|
241 | { my, "my", "Burmese" }, |
---|
242 | { na, "na", "Nauru" }, |
---|
243 | { ne, "ne", "Nepali" }, |
---|
244 | { nl, "nl", "Dutch" }, |
---|
245 | { no, "no", "Norwegian" }, |
---|
246 | { oc, "oc", "Occitan" }, |
---|
247 | { om, "om", "(Afan) Oromo" }, |
---|
248 | { or, "or", "Oriya" }, |
---|
249 | { pa, "pa", "Punjabi" }, |
---|
250 | { pl, "pl", "Polish" }, |
---|
251 | { ps, "ps", "Pashto, Pushto" }, |
---|
252 | { pt, "pt", "Portuguese" }, |
---|
253 | { qu, "qu", "Quechua" }, |
---|
254 | { rm, "rm", "Rhaeto-Romance" }, |
---|
255 | { rn, "rn", "Kirundi" }, |
---|
256 | { ro, "ro", "Romanian" }, |
---|
257 | { ru, "ru", "Russian" }, |
---|
258 | { rw, "rw", "Kinyarwanda" }, |
---|
259 | { sa, "sa", "Sanskrit" }, |
---|
260 | { sd, "sd", "Sindhi" }, |
---|
261 | { sg, "sg", "Sangro" }, |
---|
262 | { sh, "sh", "Serbo-Croatian" }, |
---|
263 | { si, "si", "Sinhalese" }, |
---|
264 | { sk, "sk", "Slovak" }, |
---|
265 | { sl, "sl", "Slovenian" }, |
---|
266 | { sm, "sm", "Samoan" }, |
---|
267 | { sn, "sn", "Shona" }, |
---|
268 | { so, "so", "Somali" }, |
---|
269 | { sq, "sq", "Albanian" }, |
---|
270 | { sr, "sr", "Serbian" }, |
---|
271 | { ss, "ss", "Siswati" }, |
---|
272 | { st, "st", "Sesotho" }, |
---|
273 | { su, "su", "Sundanese" }, |
---|
274 | { sv, "sv", "Swedish" }, |
---|
275 | { sw, "sw", "Swahili" }, |
---|
276 | { ta, "ta", "Tamil" }, |
---|
277 | { te, "te", "Telugu" }, |
---|
278 | { tg, "tg", "Tajik" }, |
---|
279 | { th, "th", "Thai" }, |
---|
280 | { ti, "ti", "Tigrinya" }, |
---|
281 | { tk, "tk", "Turkmen" }, |
---|
282 | { tl, "tl", "Tagalog" }, |
---|
283 | { tn, "tn", "Setswana" }, |
---|
284 | { to, "to", "Tonga" }, |
---|
285 | { tr, "tr", "Turkish" }, |
---|
286 | { ts, "ts", "Tsonga" }, |
---|
287 | { tt, "tt", "Tatar" }, |
---|
288 | { tw, "tw", "Twi" }, |
---|
289 | { ug, "ug", "Uighur" }, |
---|
290 | { uk, "uk", "Ukrainian" }, |
---|
291 | { ur, "ur", "Urdu" }, |
---|
292 | { uz, "uz", "Uzbek" }, |
---|
293 | { vi, "vi", "Vietnamese" }, |
---|
294 | { vo, "vo", "Volapuk" }, |
---|
295 | { wo, "wo", "Wolof" }, |
---|
296 | { xh, "xh", "Xhosa" }, |
---|
297 | { yi, "yi", "Yiddish" } /* (formerly ji) */, |
---|
298 | { yo, "yo", "Yoruba" }, |
---|
299 | { za, "za", "Zhuang" }, |
---|
300 | { zh, "zh", "Chinese" }, |
---|
301 | { zu, "zu", "Zulu" }, |
---|
302 | { last_language_code, NULL, NULL } |
---|
303 | }; |
---|
304 | |
---|
305 | |
---|
306 | |
---|
307 | /* @documentlanguage. Maybe we'll do something useful with this in the |
---|
308 | future. For now, we just recognize it. */ |
---|
309 | void |
---|
310 | cm_documentlanguage () |
---|
311 | { |
---|
312 | language_code_type c; |
---|
313 | char *lang_arg; |
---|
314 | |
---|
315 | /* Read the line with the language code on it. */ |
---|
316 | get_rest_of_line (0, &lang_arg); |
---|
317 | |
---|
318 | /* Linear search is fine these days. */ |
---|
319 | for (c = aa; c != last_language_code; c++) |
---|
320 | { |
---|
321 | if (strcmp (lang_arg, language_table[c].abbrev) == 0) |
---|
322 | { /* Set current language code. */ |
---|
323 | language_code = c; |
---|
324 | break; |
---|
325 | } |
---|
326 | } |
---|
327 | |
---|
328 | /* If we didn't find this code, complain. */ |
---|
329 | if (c == last_language_code) |
---|
330 | warning (_("%s is not a valid ISO 639 language code"), lang_arg); |
---|
331 | |
---|
332 | free (lang_arg); |
---|
333 | } |
---|
334 | |
---|
335 | |
---|
336 | |
---|
337 | /* Search through the encoding table for the given character, returning |
---|
338 | its equivalent. */ |
---|
339 | |
---|
340 | static int |
---|
341 | cm_search_iso_map (html) |
---|
342 | char *html; |
---|
343 | { |
---|
344 | int i; |
---|
345 | iso_map_type *iso = encoding_table[document_encoding_code].isotab; |
---|
346 | |
---|
347 | /* If no conversion table for this encoding, quit. */ |
---|
348 | if (!iso) |
---|
349 | return -1; |
---|
350 | |
---|
351 | for (i = 0; iso[i].html; i++) |
---|
352 | { |
---|
353 | if (strcmp (html, iso[i].html) == 0) |
---|
354 | return i; |
---|
355 | } |
---|
356 | |
---|
357 | return -1; |
---|
358 | } |
---|
359 | |
---|
360 | |
---|
361 | /* @documentencoding. Set the translation table. */ |
---|
362 | |
---|
363 | void |
---|
364 | cm_documentencoding () |
---|
365 | { |
---|
366 | encoding_code_type enc; |
---|
367 | char *enc_arg; |
---|
368 | |
---|
369 | get_rest_of_line (1, &enc_arg); |
---|
370 | |
---|
371 | /* See if we have this encoding. */ |
---|
372 | for (enc = no_encoding+1; enc != last_encoding_code; enc++) |
---|
373 | { |
---|
374 | if (strcasecmp (enc_arg, encoding_table[enc].ecname) == 0) |
---|
375 | { |
---|
376 | document_encoding_code = enc; |
---|
377 | break; |
---|
378 | } |
---|
379 | } |
---|
380 | |
---|
381 | /* If we didn't find this code, complain. */ |
---|
382 | if (enc == last_encoding_code) |
---|
383 | warning (_("unrecogized encoding name `%s'"), enc_arg); |
---|
384 | |
---|
385 | else if (encoding_table[document_encoding_code].isotab == NULL) |
---|
386 | warning (_("sorry, encoding `%s' not supported"), enc_arg); |
---|
387 | |
---|
388 | free (enc_arg); |
---|
389 | } |
---|
390 | |
---|
391 | |
---|
392 | /* If html or xml output, add HTML_STR to the output. If not html and |
---|
393 | the user requested encoded output, add the real 8-bit character |
---|
394 | corresponding to HTML_STR from the translation tables. Otherwise, |
---|
395 | add INFO_STR. */ |
---|
396 | |
---|
397 | void |
---|
398 | add_encoded_char (html_str, info_str) |
---|
399 | char *html_str; |
---|
400 | char *info_str; |
---|
401 | { |
---|
402 | if (html) |
---|
403 | add_word_args ("&%s;", html_str); |
---|
404 | else if (xml) |
---|
405 | xml_insert_entity (html_str); |
---|
406 | else if (enable_encoding) |
---|
407 | { |
---|
408 | /* Look for HTML_STR in the current translation table. */ |
---|
409 | int rc = cm_search_iso_map (html_str); |
---|
410 | if (rc >= 0) |
---|
411 | /* We found it, add the real character. */ |
---|
412 | add_char (encoding_table[document_encoding_code].isotab[rc].bytecode); |
---|
413 | else |
---|
414 | { /* We didn't find it, that seems bad. */ |
---|
415 | warning (_("invalid encoded character `%s'"), html_str); |
---|
416 | add_word (info_str); |
---|
417 | } |
---|
418 | } |
---|
419 | else |
---|
420 | add_word (info_str); |
---|
421 | } |
---|
422 | |
---|
423 | |
---|
424 | |
---|
425 | /* Output an accent for HTML or XML. */ |
---|
426 | |
---|
427 | static void |
---|
428 | cm_accent_generic_html (arg, start, end, html_supported, single, |
---|
429 | html_solo_standalone, html_solo) |
---|
430 | int arg, start, end; |
---|
431 | char *html_supported; |
---|
432 | int single; |
---|
433 | int html_solo_standalone; |
---|
434 | char *html_solo; |
---|
435 | { |
---|
436 | static int valid_html_accent; /* yikes */ |
---|
437 | |
---|
438 | if (arg == START) |
---|
439 | { /* If HTML has good support for this character, use it. */ |
---|
440 | if (strchr (html_supported, curchar ())) |
---|
441 | { /* Yes; start with an ampersand. The character itself |
---|
442 | will be added later in read_command (makeinfo.c). */ |
---|
443 | int saved_escape_html = escape_html; |
---|
444 | escape_html = 0; |
---|
445 | valid_html_accent = 1; |
---|
446 | add_char ('&'); |
---|
447 | escape_html = saved_escape_html; |
---|
448 | } |
---|
449 | else |
---|
450 | { |
---|
451 | valid_html_accent = 0; |
---|
452 | if (html_solo_standalone) |
---|
453 | { /* No special HTML support, so produce standalone char. */ |
---|
454 | if (xml) |
---|
455 | xml_insert_entity (html_solo); |
---|
456 | else |
---|
457 | add_word_args ("&%s;", html_solo); |
---|
458 | } |
---|
459 | else |
---|
460 | /* If the html_solo does not exist as standalone character |
---|
461 | (namely ˆ ` ˜), then we use |
---|
462 | the single character version instead. */ |
---|
463 | add_char (single); |
---|
464 | } |
---|
465 | } |
---|
466 | else if (arg == END) |
---|
467 | { /* Only if we saw a valid_html_accent can we use the full |
---|
468 | HTML accent (umlaut, grave ...). */ |
---|
469 | if (valid_html_accent) |
---|
470 | { |
---|
471 | add_word (html_solo); |
---|
472 | add_char (';'); |
---|
473 | } |
---|
474 | } |
---|
475 | } |
---|
476 | |
---|
477 | |
---|
478 | static void |
---|
479 | cm_accent_generic_no_headers (arg, start, end, single, html_solo) |
---|
480 | int arg, start, end; |
---|
481 | int single; |
---|
482 | char *html_solo; |
---|
483 | { |
---|
484 | if (arg == END) |
---|
485 | { |
---|
486 | if (no_encoding) |
---|
487 | add_char (single); |
---|
488 | else |
---|
489 | { |
---|
490 | int rc; |
---|
491 | char *buffer = xmalloc (1 + strlen (html_solo) + 1); |
---|
492 | buffer[0] = output_paragraph[end - 1]; |
---|
493 | buffer[1] = 0; |
---|
494 | strcat (buffer, html_solo); |
---|
495 | |
---|
496 | rc = cm_search_iso_map (buffer); |
---|
497 | if (rc >= 0) |
---|
498 | /* A little bit tricky ;-) |
---|
499 | Here we replace the character which has |
---|
500 | been inserted in read_command with |
---|
501 | the value we have found in converting table |
---|
502 | Does there exist a better way to do this? kama. */ |
---|
503 | output_paragraph[end - 1] |
---|
504 | = encoding_table[document_encoding_code].isotab[rc].bytecode; |
---|
505 | else |
---|
506 | { /* If we didn't find a translation for this character, |
---|
507 | put the single instead. E.g., &Xuml; does not exist so X¨ |
---|
508 | should be produced. */ |
---|
509 | warning (_("%s is an invalid ISO code, using %c"), |
---|
510 | buffer, single); |
---|
511 | add_char (single); |
---|
512 | } |
---|
513 | |
---|
514 | free (buffer); |
---|
515 | } |
---|
516 | } |
---|
517 | } |
---|
518 | |
---|
519 | |
---|
520 | |
---|
521 | /* Accent commands that take explicit arguments and don't have any |
---|
522 | special HTML support. */ |
---|
523 | |
---|
524 | void |
---|
525 | cm_accent (arg) |
---|
526 | int arg; |
---|
527 | { |
---|
528 | int old_escape_html = escape_html; |
---|
529 | escape_html = 0; |
---|
530 | if (arg == START) |
---|
531 | { |
---|
532 | /* Must come first to avoid ambiguity with overdot. */ |
---|
533 | if (strcmp (command, "udotaccent") == 0) /* underdot */ |
---|
534 | add_char ('.'); |
---|
535 | } |
---|
536 | else if (arg == END) |
---|
537 | { |
---|
538 | if (strcmp (command, "=") == 0) /* macron */ |
---|
539 | add_word ((html || xml) ? "¯" : "="); |
---|
540 | else if (strcmp (command, "H") == 0) /* Hungarian umlaut */ |
---|
541 | add_word ("''"); |
---|
542 | else if (strcmp (command, "dotaccent") == 0) /* overdot */ |
---|
543 | add_meta_char ('.'); |
---|
544 | else if (strcmp (command, "ringaccent") == 0) /* ring */ |
---|
545 | add_char ('*'); |
---|
546 | else if (strcmp (command, "tieaccent") == 0) /* long tie */ |
---|
547 | add_char ('['); |
---|
548 | else if (strcmp (command, "u") == 0) /* breve */ |
---|
549 | add_char ('('); |
---|
550 | else if (strcmp (command, "ubaraccent") == 0) /* underbar */ |
---|
551 | add_char ('_'); |
---|
552 | else if (strcmp (command, "v") == 0) /* hacek/check */ |
---|
553 | add_word ((html || xml) ? "<" : "<"); |
---|
554 | } |
---|
555 | escape_html = old_escape_html; |
---|
556 | } |
---|
557 | |
---|
558 | /* Common routine for the accent characters that have support in HTML. |
---|
559 | If the character being accented is in the HTML_SUPPORTED set, then |
---|
560 | produce &CHTML_SOLO;, for example, Ä for an A-umlaut. If not in |
---|
561 | HTML_SUPPORTED, just produce &HTML_SOLO;X for the best we can do with |
---|
562 | at an X-umlaut. If not producing HTML, just use SINGLE, a |
---|
563 | character such as " which is the best plain text representation we |
---|
564 | can manage. If HTML_SOLO_STANDALONE is nonzero the given HTML_SOLO |
---|
565 | exists as valid standalone character in HTML, e.g., ¨. */ |
---|
566 | |
---|
567 | static void |
---|
568 | cm_accent_generic (arg, start, end, html_supported, single, |
---|
569 | html_solo_standalone, html_solo) |
---|
570 | int arg, start, end; |
---|
571 | char *html_supported; |
---|
572 | int single; |
---|
573 | int html_solo_standalone; |
---|
574 | char *html_solo; |
---|
575 | { |
---|
576 | if (html || xml) |
---|
577 | cm_accent_generic_html (arg, start, end, html_supported, |
---|
578 | single, html_solo_standalone, html_solo); |
---|
579 | else if (no_headers) |
---|
580 | cm_accent_generic_no_headers (arg, start, end, single, html_solo); |
---|
581 | else if (arg == END) |
---|
582 | { |
---|
583 | if (enable_encoding) |
---|
584 | /* use 8-bit if available */ |
---|
585 | cm_accent_generic_no_headers (arg, start, end, single, html_solo); |
---|
586 | else |
---|
587 | /* use regular character */ |
---|
588 | add_char (single); |
---|
589 | } |
---|
590 | } |
---|
591 | |
---|
592 | void |
---|
593 | cm_accent_umlaut (arg, start, end) |
---|
594 | int arg, start, end; |
---|
595 | { |
---|
596 | cm_accent_generic (arg, start, end, "aouAOUEeIiy", '"', 1, "uml"); |
---|
597 | } |
---|
598 | |
---|
599 | void |
---|
600 | cm_accent_acute (arg, start, end) |
---|
601 | int arg, start, end; |
---|
602 | { |
---|
603 | cm_accent_generic (arg, start, end, "AEIOUYaeiouy", '\'', 1, "acute"); |
---|
604 | } |
---|
605 | |
---|
606 | void |
---|
607 | cm_accent_cedilla (arg, start, end) |
---|
608 | int arg, start, end; |
---|
609 | { |
---|
610 | cm_accent_generic (arg, start, end, "Cc", ',', 1, "cedil"); |
---|
611 | } |
---|
612 | |
---|
613 | void |
---|
614 | cm_accent_hat (arg, start, end) |
---|
615 | int arg, start, end; |
---|
616 | { |
---|
617 | cm_accent_generic (arg, start, end, "AEIOUaeiou", '^', 0, "circ"); |
---|
618 | } |
---|
619 | |
---|
620 | void |
---|
621 | cm_accent_grave (arg, start, end) |
---|
622 | int arg, start, end; |
---|
623 | { |
---|
624 | cm_accent_generic (arg, start, end, "AEIOUaeiou", '`', 0, "grave"); |
---|
625 | } |
---|
626 | |
---|
627 | void |
---|
628 | cm_accent_tilde (arg, start, end) |
---|
629 | int arg, start, end; |
---|
630 | { |
---|
631 | cm_accent_generic (arg, start, end, "ANOano", '~', 0, "tilde"); |
---|
632 | } |
---|
633 | |
---|
634 | |
---|
635 | |
---|
636 | /* Non-English letters/characters that don't insert themselves. */ |
---|
637 | void |
---|
638 | cm_special_char (arg) |
---|
639 | { |
---|
640 | int old_escape_html = escape_html; |
---|
641 | escape_html = 0; |
---|
642 | |
---|
643 | if (arg == START) |
---|
644 | { |
---|
645 | if ((*command == 'L' || *command == 'l' |
---|
646 | || *command == 'O' || *command == 'o') |
---|
647 | && command[1] == 0) |
---|
648 | { /* Lslash lslash Oslash oslash. |
---|
649 | Lslash and lslash aren't supported in HTML. */ |
---|
650 | if ((html || xml) && command[0] == 'O') |
---|
651 | add_encoded_char ("Oslash", "/O"); |
---|
652 | else if ((html || xml) && command[0] == 'o') |
---|
653 | add_encoded_char ("oslash", "/o"); |
---|
654 | else |
---|
655 | add_word_args ("/%c", command[0]); |
---|
656 | } |
---|
657 | else if (strcmp (command, "exclamdown") == 0) |
---|
658 | add_encoded_char ("iexcl", "!"); |
---|
659 | else if (strcmp (command, "pounds") == 0) |
---|
660 | add_encoded_char ("pound" , "#"); |
---|
661 | else if (strcmp (command, "questiondown") == 0) |
---|
662 | add_encoded_char ("iquest", "?"); |
---|
663 | else if (strcmp (command, "AE") == 0) |
---|
664 | add_encoded_char ("AElig", command); |
---|
665 | else if (strcmp (command, "ae") == 0) |
---|
666 | add_encoded_char ("aelig", command); |
---|
667 | else if (strcmp (command, "OE") == 0) |
---|
668 | add_word ("Œ", command); |
---|
669 | else if (strcmp (command, "oe") == 0) |
---|
670 | add_word ("œ", command); |
---|
671 | else if (strcmp (command, "AA") == 0) |
---|
672 | add_encoded_char ("Aring", command); |
---|
673 | else if (strcmp (command, "aa") == 0) |
---|
674 | add_encoded_char ("aring", command); |
---|
675 | else if (strcmp (command, "ss") == 0) |
---|
676 | add_encoded_char ("szlig", command); |
---|
677 | else |
---|
678 | line_error ("cm_special_char internal error: command=@%s", command); |
---|
679 | } |
---|
680 | escape_html = old_escape_html; |
---|
681 | } |
---|
682 | |
---|
683 | /* Dotless i or j. */ |
---|
684 | void |
---|
685 | cm_dotless (arg, start, end) |
---|
686 | int arg, start, end; |
---|
687 | { |
---|
688 | if (arg == END) |
---|
689 | { |
---|
690 | xml_no_para --; |
---|
691 | if (output_paragraph[start] != 'i' && output_paragraph[start] != 'j') |
---|
692 | /* This error message isn't perfect if the argument is multiple |
---|
693 | characters, but it doesn't seem worth getting right. */ |
---|
694 | line_error (_("%c%s expects `i' or `j' as argument, not `%c'"), |
---|
695 | COMMAND_PREFIX, command, output_paragraph[start]); |
---|
696 | |
---|
697 | else if (end - start != 1) |
---|
698 | line_error (_("%c%s expects a single character `i' or `j' as argument"), |
---|
699 | COMMAND_PREFIX, command); |
---|
700 | |
---|
701 | /* We've already inserted the `i' or `j', so nothing to do. */ |
---|
702 | } |
---|
703 | else |
---|
704 | xml_no_para ++; |
---|
705 | } |
---|