1 | /* -*- Mode: C; indent-tabs-mode: t; c-basic-offset: 8; tab-width: 8 -*- */ |
---|
2 | /* htmlentity.c |
---|
3 | * |
---|
4 | * Copyright (C) 1999 Helix Code, Inc. |
---|
5 | * |
---|
6 | * This program is free software; you can redistribute it and/or |
---|
7 | * modify it under the terms of the GNU General Public License as |
---|
8 | * published by the Free Software Foundation; either version 2 of the |
---|
9 | * License, or (at your option) any later version. |
---|
10 | * |
---|
11 | * This program is distributed in the hope that it will be useful, |
---|
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
14 | * General Public License for more details. |
---|
15 | * |
---|
16 | * You should have received a copy of the GNU General Public |
---|
17 | * License along with this program; if not, write to the |
---|
18 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
---|
19 | * Boston, MA 02111-1307, USA. |
---|
20 | * |
---|
21 | * Author: Ettore Perazzoli |
---|
22 | */ |
---|
23 | |
---|
24 | #include <config.h> |
---|
25 | #include <string.h> |
---|
26 | #include <stdlib.h> |
---|
27 | #include "gtkhtml-compat.h" |
---|
28 | |
---|
29 | #include <glib.h> |
---|
30 | #include "htmlentity.h" |
---|
31 | |
---|
32 | |
---|
33 | struct _EntityEntry { |
---|
34 | guint value; |
---|
35 | const gchar *str; |
---|
36 | }; |
---|
37 | typedef struct _EntityEntry EntityEntry; |
---|
38 | |
---|
39 | static EntityEntry entity_table[] = { |
---|
40 | |
---|
41 | /* Latin1 */ |
---|
42 | { 160, "nbsp" }, |
---|
43 | { 161, "iexcl" }, |
---|
44 | { 162, "cent" }, |
---|
45 | { 163, "pound" }, |
---|
46 | { 164, "curren" }, |
---|
47 | { 165, "yen" }, |
---|
48 | { 166, "brvbar" }, |
---|
49 | { 167, "sect" }, |
---|
50 | { 168, "uml" }, |
---|
51 | { 169, "copy" }, |
---|
52 | { 170, "ordf" }, |
---|
53 | { 171, "laquo" }, |
---|
54 | { 172, "not" }, |
---|
55 | { 173, "shy" }, |
---|
56 | { 174, "reg" }, |
---|
57 | { 175, "macr" }, |
---|
58 | { 176, "deg" }, |
---|
59 | { 177, "plusmn" }, |
---|
60 | { 178, "sup2" }, |
---|
61 | { 179, "sup3" }, |
---|
62 | { 180, "acute" }, |
---|
63 | { 181, "micro" }, |
---|
64 | { 182, "para" }, |
---|
65 | { 183, "middot" }, |
---|
66 | { 184, "cedil" }, |
---|
67 | { 185, "sup1" }, |
---|
68 | { 186, "ordm" }, |
---|
69 | { 187, "raquo" }, |
---|
70 | { 188, "frac14" }, |
---|
71 | { 189, "frac12" }, |
---|
72 | { 190, "frac34" }, |
---|
73 | { 191, "iquest" }, |
---|
74 | { 192, "Agrave" }, |
---|
75 | { 193, "Aacute" }, |
---|
76 | { 194, "Acirc" }, |
---|
77 | { 195, "Atilde" }, |
---|
78 | { 196, "Auml" }, |
---|
79 | { 197, "Aring" }, |
---|
80 | { 198, "AElig" }, |
---|
81 | { 199, "Ccedil" }, |
---|
82 | { 200, "Egrave" }, |
---|
83 | { 201, "Eacute" }, |
---|
84 | { 202, "Ecirc" }, |
---|
85 | { 203, "Euml" }, |
---|
86 | { 204, "Igrave" }, |
---|
87 | { 205, "Iacute" }, |
---|
88 | { 206, "Icirc" }, |
---|
89 | { 207, "Iuml" }, |
---|
90 | { 208, "ETH" }, |
---|
91 | { 209, "Ntilde" }, |
---|
92 | { 210, "Ograve" }, |
---|
93 | { 211, "Oacute" }, |
---|
94 | { 212, "Ocirc" }, |
---|
95 | { 213, "Otilde" }, |
---|
96 | { 214, "Ouml" }, |
---|
97 | { 215, "times" }, |
---|
98 | { 216, "Oslash" }, |
---|
99 | { 217, "Ugrave" }, |
---|
100 | { 218, "Uacute" }, |
---|
101 | { 219, "Ucirc" }, |
---|
102 | { 220, "Uuml" }, |
---|
103 | { 221, "Yacute" }, |
---|
104 | { 222, "THORN" }, |
---|
105 | { 223, "szlig" }, |
---|
106 | { 224, "agrave" }, |
---|
107 | { 225, "aacute" }, |
---|
108 | { 226, "acirc" }, |
---|
109 | { 227, "atilde" }, |
---|
110 | { 228, "auml" }, |
---|
111 | { 229, "aring" }, |
---|
112 | { 230, "aelig" }, |
---|
113 | { 231, "ccedil" }, |
---|
114 | { 232, "egrave" }, |
---|
115 | { 233, "eacute" }, |
---|
116 | { 234, "ecirc" }, |
---|
117 | { 235, "euml" }, |
---|
118 | { 236, "igrave" }, |
---|
119 | { 237, "iacute" }, |
---|
120 | { 238, "icirc" }, |
---|
121 | { 239, "iuml" }, |
---|
122 | { 240, "eth" }, |
---|
123 | { 241, "ntilde" }, |
---|
124 | { 242, "ograve" }, |
---|
125 | { 243, "oacute" }, |
---|
126 | { 244, "ocirc" }, |
---|
127 | { 245, "otilde" }, |
---|
128 | { 246, "ouml" }, |
---|
129 | { 247, "divide" }, |
---|
130 | { 248, "oslash" }, |
---|
131 | { 249, "ugrave" }, |
---|
132 | { 250, "uacute" }, |
---|
133 | { 251, "ucirc" }, |
---|
134 | { 252, "uuml" }, |
---|
135 | { 253, "yacute" }, |
---|
136 | { 254, "thorn" }, |
---|
137 | { 255, "yuml" }, |
---|
138 | |
---|
139 | /* special charactes */ |
---|
140 | { 34, "quot" }, |
---|
141 | { 38, "amp" }, |
---|
142 | { 39, "apos" }, |
---|
143 | { 60, "lt" }, |
---|
144 | { 62, "gt" }, |
---|
145 | { 338, "OElig" }, |
---|
146 | { 339, "oelig" }, |
---|
147 | { 352, "Scaron" }, |
---|
148 | { 353, "scaron" }, |
---|
149 | { 376, "Yuml" }, |
---|
150 | { 710, "circ" }, |
---|
151 | { 732, "tilde" }, |
---|
152 | { 8194, "ensp" }, |
---|
153 | { 8195, "emsp" }, |
---|
154 | { 8201, "thinsp" }, |
---|
155 | { 8204, "zwnj" }, |
---|
156 | { 8205, "zwj" }, |
---|
157 | { 8206, "lrm" }, |
---|
158 | { 8207, "rlm" }, |
---|
159 | { 8211, "ndash" }, |
---|
160 | { 8212, "mdash" }, |
---|
161 | { 8216, "lsquo" }, |
---|
162 | { 8217, "rsquo" }, |
---|
163 | { 8218, "sbquo" }, |
---|
164 | { 8220, "ldquo" }, |
---|
165 | { 8221, "rdquo" }, |
---|
166 | { 8222, "bdquo" }, |
---|
167 | { 8224, "dagger" }, |
---|
168 | { 8225, "Dagger" }, |
---|
169 | { 8240, "permil" }, |
---|
170 | { 8249, "lsaquo" }, |
---|
171 | { 8250, "rsaquo" }, |
---|
172 | { 8364, "euro" }, |
---|
173 | |
---|
174 | /* symbols */ |
---|
175 | { 402, "fnof" }, |
---|
176 | { 913, "Alpha" }, |
---|
177 | { 914, "Beta" }, |
---|
178 | { 915, "Gamma" }, |
---|
179 | { 916, "Delta" }, |
---|
180 | { 917, "Epsilon" }, |
---|
181 | { 918, "Zeta" }, |
---|
182 | { 919, "Eta" }, |
---|
183 | { 920, "Theta" }, |
---|
184 | { 921, "Iota" }, |
---|
185 | { 922, "Kappa" }, |
---|
186 | { 923, "Lambda" }, |
---|
187 | { 924, "Mu" }, |
---|
188 | { 925, "Nu" }, |
---|
189 | { 926, "Xi" }, |
---|
190 | { 927, "Omicron" }, |
---|
191 | { 928, "Pi" }, |
---|
192 | { 929, "Rho" }, |
---|
193 | { 931, "Sigma" }, |
---|
194 | { 932, "Tau" }, |
---|
195 | { 933, "Upsilon" }, |
---|
196 | { 934, "Phi" }, |
---|
197 | { 935, "Chi" }, |
---|
198 | { 936, "Psi" }, |
---|
199 | { 937, "Omega" }, |
---|
200 | { 945, "alpha" }, |
---|
201 | { 946, "beta" }, |
---|
202 | { 947, "gamma" }, |
---|
203 | { 948, "delta" }, |
---|
204 | { 949, "epsilon" }, |
---|
205 | { 950, "zeta" }, |
---|
206 | { 951, "eta" }, |
---|
207 | { 952, "theta" }, |
---|
208 | { 953, "iota" }, |
---|
209 | { 954, "kappa" }, |
---|
210 | { 955, "lambda" }, |
---|
211 | { 956, "mu" }, |
---|
212 | { 957, "nu" }, |
---|
213 | { 958, "xi" }, |
---|
214 | { 959, "omicron" }, |
---|
215 | { 960, "pi" }, |
---|
216 | { 961, "rho" }, |
---|
217 | { 962, "sigmaf" }, |
---|
218 | { 963, "sigma" }, |
---|
219 | { 964, "tau" }, |
---|
220 | { 965, "upsilon" }, |
---|
221 | { 966, "phi" }, |
---|
222 | { 967, "chi" }, |
---|
223 | { 968, "psi" }, |
---|
224 | { 969, "omega" }, |
---|
225 | { 977, "thetasym" }, |
---|
226 | { 978, "upsih" }, |
---|
227 | { 982, "piv" }, |
---|
228 | { 8226, "bull" }, |
---|
229 | { 8230, "hellip" }, |
---|
230 | { 8242, "prime" }, |
---|
231 | { 8243, "Prime" }, |
---|
232 | { 8254, "oline" }, |
---|
233 | { 8260, "frasl" }, |
---|
234 | { 8472, "weierp" }, |
---|
235 | { 8465, "image" }, |
---|
236 | { 8476, "real" }, |
---|
237 | { 8482, "trade" }, |
---|
238 | { 8501, "alefsym" }, |
---|
239 | { 8592, "larr" }, |
---|
240 | { 8593, "uarr" }, |
---|
241 | { 8594, "rarr" }, |
---|
242 | { 8595, "darr" }, |
---|
243 | { 8596, "harr" }, |
---|
244 | { 8629, "crarr" }, |
---|
245 | { 8656, "lArr" }, |
---|
246 | { 8657, "uArr" }, |
---|
247 | { 8658, "rArr" }, |
---|
248 | { 8659, "dArr" }, |
---|
249 | { 8660, "hArr" }, |
---|
250 | { 8704, "forall" }, |
---|
251 | { 8706, "part" }, |
---|
252 | { 8707, "exist" }, |
---|
253 | { 8709, "empty" }, |
---|
254 | { 8711, "nabla" }, |
---|
255 | { 8712, "isin" }, |
---|
256 | { 8713, "notin" }, |
---|
257 | { 8715, "ni" }, |
---|
258 | { 8719, "prod" }, |
---|
259 | { 8721, "sum" }, |
---|
260 | { 8722, "minus" }, |
---|
261 | { 8727, "lowast" }, |
---|
262 | { 8730, "radic" }, |
---|
263 | { 8733, "prop" }, |
---|
264 | { 8734, "infin" }, |
---|
265 | { 8736, "ang" }, |
---|
266 | { 8743, "and" }, |
---|
267 | { 8744, "or" }, |
---|
268 | { 8745, "cap" }, |
---|
269 | { 8746, "cup" }, |
---|
270 | { 8747, "int" }, |
---|
271 | { 8756, "there4" }, |
---|
272 | { 8764, "sim" }, |
---|
273 | { 8773, "cong" }, |
---|
274 | { 8776, "asymp" }, |
---|
275 | { 8800, "ne" }, |
---|
276 | { 8801, "equiv" }, |
---|
277 | { 8804, "le" }, |
---|
278 | { 8805, "ge" }, |
---|
279 | { 8834, "sub" }, |
---|
280 | { 8835, "sup" }, |
---|
281 | { 8836, "nsub" }, |
---|
282 | { 8838, "sube" }, |
---|
283 | { 8839, "supe" }, |
---|
284 | { 8853, "oplus" }, |
---|
285 | { 8855, "otimes" }, |
---|
286 | { 8869, "perp" }, |
---|
287 | { 8901, "sdot" }, |
---|
288 | { 8968, "lceil" }, |
---|
289 | { 8969, "rceil" }, |
---|
290 | { 8970, "lfloor" }, |
---|
291 | { 8971, "rfloor" }, |
---|
292 | { 9001, "lang" }, |
---|
293 | { 9002, "rang" }, |
---|
294 | { 9674, "loz" }, |
---|
295 | { 9824, "spades" }, |
---|
296 | { 9827, "clubs" }, |
---|
297 | { 9829, "hearts" }, |
---|
298 | { 9830, "diams" }, |
---|
299 | }; |
---|
300 | |
---|
301 | |
---|
302 | /* FIXME FIXME this function just sucks. We should use gperf or something instead. */ |
---|
303 | |
---|
304 | static gint |
---|
305 | html_g_str_case_equal (gconstpointer v, gconstpointer v2) |
---|
306 | { |
---|
307 | return strcasecmp ((const gchar*) v, (const gchar*)v2) == 0; |
---|
308 | } |
---|
309 | |
---|
310 | gulong |
---|
311 | html_entity_parse (const gchar *s, guint len) |
---|
312 | { |
---|
313 | static GHashTable *ehash = NULL; |
---|
314 | gchar *t; |
---|
315 | |
---|
316 | if (!ehash) { |
---|
317 | gint i; |
---|
318 | |
---|
319 | ehash = g_hash_table_new (g_str_hash, html_g_str_case_equal); |
---|
320 | |
---|
321 | for (i = 0; i < sizeof (entity_table) / sizeof (entity_table[0]); i++) |
---|
322 | g_hash_table_insert (ehash, (gpointer) entity_table[i].str, GINT_TO_POINTER (entity_table[i].value)); |
---|
323 | } |
---|
324 | |
---|
325 | if (len > 0) { |
---|
326 | t = alloca (len + 1); |
---|
327 | memcpy (t, s, len); |
---|
328 | *(t + len) = '\0'; |
---|
329 | } else { |
---|
330 | t = (gchar *) s; |
---|
331 | } |
---|
332 | |
---|
333 | return GPOINTER_TO_INT (g_hash_table_lookup (ehash, t)); |
---|
334 | } |
---|