1 | /* |
---|
2 | * DOCBparser.c : an attempt to parse SGML Docbook documents |
---|
3 | * |
---|
4 | * This is extremely hackish. It also adds one extension |
---|
5 | * <?sgml-declaration encoding="ISO-8859-1"?> |
---|
6 | * allowing to store the encoding of the document within the instance. |
---|
7 | * |
---|
8 | * See Copyright for the status of this software. |
---|
9 | * |
---|
10 | * daniel@veillard.com |
---|
11 | */ |
---|
12 | |
---|
13 | #include "libxml.h" |
---|
14 | #ifdef LIBXML_DOCB_ENABLED |
---|
15 | |
---|
16 | #include <string.h> |
---|
17 | #ifdef HAVE_CTYPE_H |
---|
18 | #include <ctype.h> |
---|
19 | #endif |
---|
20 | #ifdef HAVE_STDLIB_H |
---|
21 | #include <stdlib.h> |
---|
22 | #endif |
---|
23 | #ifdef HAVE_SYS_STAT_H |
---|
24 | #include <sys/stat.h> |
---|
25 | #endif |
---|
26 | #ifdef HAVE_FCNTL_H |
---|
27 | #include <fcntl.h> |
---|
28 | #endif |
---|
29 | #ifdef HAVE_UNISTD_H |
---|
30 | #include <unistd.h> |
---|
31 | #endif |
---|
32 | #ifdef HAVE_ZLIB_H |
---|
33 | #include <zlib.h> |
---|
34 | #endif |
---|
35 | |
---|
36 | #include <libxml/xmlmemory.h> |
---|
37 | #include <libxml/tree.h> |
---|
38 | #include <libxml/SAX.h> |
---|
39 | #include <libxml/parser.h> |
---|
40 | #include <libxml/parserInternals.h> |
---|
41 | #include <libxml/xmlerror.h> |
---|
42 | #include <libxml/DOCBparser.h> |
---|
43 | #include <libxml/entities.h> |
---|
44 | #include <libxml/encoding.h> |
---|
45 | #include <libxml/valid.h> |
---|
46 | #include <libxml/xmlIO.h> |
---|
47 | #include <libxml/uri.h> |
---|
48 | #include <libxml/globals.h> |
---|
49 | |
---|
50 | /* |
---|
51 | * DocBook XML current versions |
---|
52 | */ |
---|
53 | |
---|
54 | #define XML_DOCBOOK_XML_PUBLIC (const xmlChar *) \ |
---|
55 | "-//OASIS//DTD DocBook XML V4.1.2//EN" |
---|
56 | #define XML_DOCBOOK_XML_SYSTEM (const xmlChar *) \ |
---|
57 | "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" |
---|
58 | |
---|
59 | /* |
---|
60 | * Internal description of an SGML entity |
---|
61 | */ |
---|
62 | typedef struct _docbEntityDesc docbEntityDesc; |
---|
63 | typedef docbEntityDesc *docbEntityDescPtr; |
---|
64 | struct _docbEntityDesc { |
---|
65 | int value; /* the UNICODE value for the character */ |
---|
66 | const char *name; /* The entity name */ |
---|
67 | const char *desc; /* the description */ |
---|
68 | }; |
---|
69 | |
---|
70 | #if 0 |
---|
71 | docbElemDescPtr docbTagLookup (const xmlChar *tag); |
---|
72 | docbEntityDescPtr docbEntityLookup(const xmlChar *name); |
---|
73 | docbEntityDescPtr docbEntityValueLookup(int value); |
---|
74 | |
---|
75 | int docbIsAutoClosed(docbDocPtr doc, |
---|
76 | docbNodePtr elem); |
---|
77 | int docbAutoCloseTag(docbDocPtr doc, |
---|
78 | const xmlChar *name, |
---|
79 | docbNodePtr elem); |
---|
80 | |
---|
81 | #endif |
---|
82 | static int docbParseCharRef(docbParserCtxtPtr ctxt); |
---|
83 | static xmlEntityPtr docbParseEntityRef(docbParserCtxtPtr ctxt, |
---|
84 | xmlChar **str); |
---|
85 | static void docbParseElement(docbParserCtxtPtr ctxt); |
---|
86 | static void docbParseContent(docbParserCtxtPtr ctxt); |
---|
87 | |
---|
88 | /* |
---|
89 | * Internal description of an SGML element |
---|
90 | */ |
---|
91 | typedef struct _docbElemDesc docbElemDesc; |
---|
92 | typedef docbElemDesc *docbElemDescPtr; |
---|
93 | struct _docbElemDesc { |
---|
94 | const char *name; /* The tag name */ |
---|
95 | int startTag; /* Whether the start tag can be implied */ |
---|
96 | int endTag; /* Whether the end tag can be implied */ |
---|
97 | int empty; /* Is this an empty element ? */ |
---|
98 | int depr; /* Is this a deprecated element ? */ |
---|
99 | int dtd; /* 1: only in Loose DTD, 2: only Frameset one */ |
---|
100 | const char *desc; /* the description */ |
---|
101 | }; |
---|
102 | |
---|
103 | |
---|
104 | #define DOCB_MAX_NAMELEN 1000 |
---|
105 | #define DOCB_PARSER_BIG_BUFFER_SIZE 1000 |
---|
106 | #define DOCB_PARSER_BUFFER_SIZE 100 |
---|
107 | |
---|
108 | /* #define DEBUG */ |
---|
109 | /* #define DEBUG_PUSH */ |
---|
110 | |
---|
111 | /************************************************************************ |
---|
112 | * * |
---|
113 | * Parser stacks related functions and macros * |
---|
114 | * * |
---|
115 | ************************************************************************/ |
---|
116 | |
---|
117 | /* |
---|
118 | * Generic function for accessing stacks in the Parser Context |
---|
119 | */ |
---|
120 | |
---|
121 | #define PUSH_AND_POP(scope, type, name) \ |
---|
122 | scope int docb##name##Push(docbParserCtxtPtr ctxt, type value) { \ |
---|
123 | if (ctxt->name##Nr >= ctxt->name##Max) { \ |
---|
124 | ctxt->name##Max *= 2; \ |
---|
125 | ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \ |
---|
126 | ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \ |
---|
127 | if (ctxt->name##Tab == NULL) { \ |
---|
128 | xmlGenericError(xmlGenericErrorContext, "realloc failed !\n"); \ |
---|
129 | return(0); \ |
---|
130 | } \ |
---|
131 | } \ |
---|
132 | ctxt->name##Tab[ctxt->name##Nr] = value; \ |
---|
133 | ctxt->name = value; \ |
---|
134 | return(ctxt->name##Nr++); \ |
---|
135 | } \ |
---|
136 | scope type docb##name##Pop(docbParserCtxtPtr ctxt) { \ |
---|
137 | type ret; \ |
---|
138 | if (ctxt->name##Nr < 0) return(0); \ |
---|
139 | ctxt->name##Nr--; \ |
---|
140 | if (ctxt->name##Nr < 0) return(0); \ |
---|
141 | if (ctxt->name##Nr > 0) \ |
---|
142 | ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \ |
---|
143 | else \ |
---|
144 | ctxt->name = NULL; \ |
---|
145 | ret = ctxt->name##Tab[ctxt->name##Nr]; \ |
---|
146 | ctxt->name##Tab[ctxt->name##Nr] = 0; \ |
---|
147 | return(ret); \ |
---|
148 | } \ |
---|
149 | |
---|
150 | /* PUSH_AND_POP(static, xmlNodePtr, node) */ |
---|
151 | PUSH_AND_POP(static, xmlChar*, name) |
---|
152 | |
---|
153 | /* |
---|
154 | * Macros for accessing the content. Those should be used only by the parser, |
---|
155 | * and not exported. |
---|
156 | * |
---|
157 | * Dirty macros, i.e. one need to make assumption on the context to use them |
---|
158 | * |
---|
159 | * CUR_PTR return the current pointer to the xmlChar to be parsed. |
---|
160 | * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled |
---|
161 | * in ISO-Latin or UTF-8, and the current 16 bit value if compiled |
---|
162 | * in UNICODE mode. This should be used internally by the parser |
---|
163 | * only to compare to ASCII values otherwise it would break when |
---|
164 | * running with UTF-8 encoding. |
---|
165 | * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only |
---|
166 | * to compare on ASCII based substring. |
---|
167 | * UPP(n) returns the n'th next xmlChar converted to uppercase. Same as CUR |
---|
168 | * it should be used only to compare on ASCII based substring. |
---|
169 | * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined |
---|
170 | * strings within the parser. |
---|
171 | * |
---|
172 | * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding |
---|
173 | * |
---|
174 | * CURRENT Returns the current char value, with the full decoding of |
---|
175 | * UTF-8 if we are using this mode. It returns an int. |
---|
176 | * NEXT Skip to the next character, this does the proper decoding |
---|
177 | * in UTF-8 mode. It also pop-up unfinished entities on the fly. |
---|
178 | * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly |
---|
179 | */ |
---|
180 | |
---|
181 | #define UPPER (toupper(*ctxt->input->cur)) |
---|
182 | |
---|
183 | #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val) |
---|
184 | |
---|
185 | #define NXT(val) ctxt->input->cur[(val)] |
---|
186 | |
---|
187 | #define UPP(val) (toupper(ctxt->input->cur[(val)])) |
---|
188 | |
---|
189 | #define CUR_PTR ctxt->input->cur |
---|
190 | |
---|
191 | #define SHRINK xmlParserInputShrink(ctxt->input) |
---|
192 | |
---|
193 | #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK) |
---|
194 | |
---|
195 | #define CURRENT ((int) (*ctxt->input->cur)) |
---|
196 | |
---|
197 | #define SKIP_BLANKS docbSkipBlankChars(ctxt) |
---|
198 | |
---|
199 | /* Imported from XML */ |
---|
200 | |
---|
201 | /* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */ |
---|
202 | #define CUR ((int) (*ctxt->input->cur)) |
---|
203 | #define NEXT xmlNextChar(ctxt),ctxt->nbChars++ |
---|
204 | |
---|
205 | #define RAW (ctxt->token ? -1 : (*ctxt->input->cur)) |
---|
206 | #define NXT(val) ctxt->input->cur[(val)] |
---|
207 | #define CUR_PTR ctxt->input->cur |
---|
208 | |
---|
209 | |
---|
210 | #define NEXTL(l) do { \ |
---|
211 | if (*(ctxt->input->cur) == '\n') { \ |
---|
212 | ctxt->input->line++; ctxt->input->col = 1; \ |
---|
213 | } else ctxt->input->col++; \ |
---|
214 | ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++; \ |
---|
215 | } while (0) |
---|
216 | |
---|
217 | /************ |
---|
218 | \ |
---|
219 | if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ |
---|
220 | if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); |
---|
221 | ************/ |
---|
222 | |
---|
223 | #define CUR_CHAR(l) docbCurrentChar(ctxt, &l) |
---|
224 | #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) |
---|
225 | |
---|
226 | #define COPY_BUF(l,b,i,v) \ |
---|
227 | if (l == 1) b[i++] = (xmlChar) v; \ |
---|
228 | else i += xmlCopyChar(l,&b[i],v) |
---|
229 | |
---|
230 | /** |
---|
231 | * docbCurrentChar: |
---|
232 | * @ctxt: the DocBook SGML parser context |
---|
233 | * @len: pointer to the length of the char read |
---|
234 | * |
---|
235 | * The current char value, if using UTF-8 this may actually span multiple |
---|
236 | * bytes in the input buffer. Implement the end of line normalization: |
---|
237 | * 2.11 End-of-Line Handling |
---|
238 | * If the encoding is unspecified, in the case we find an ISO-Latin-1 |
---|
239 | * char, then the encoding converter is plugged in automatically. |
---|
240 | * |
---|
241 | * Returns the current char value and its length |
---|
242 | */ |
---|
243 | |
---|
244 | static int |
---|
245 | docbCurrentChar(xmlParserCtxtPtr ctxt, int *len) { |
---|
246 | if (ctxt->instate == XML_PARSER_EOF) |
---|
247 | return(0); |
---|
248 | |
---|
249 | if (ctxt->token != 0) { |
---|
250 | *len = 0; |
---|
251 | return(ctxt->token); |
---|
252 | } |
---|
253 | if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { |
---|
254 | /* |
---|
255 | * We are supposed to handle UTF8, check it's valid |
---|
256 | * From rfc2044: encoding of the Unicode values on UTF-8: |
---|
257 | * |
---|
258 | * UCS-4 range (hex.) UTF-8 octet sequence (binary) |
---|
259 | * 0000 0000-0000 007F 0xxxxxxx |
---|
260 | * 0000 0080-0000 07FF 110xxxxx 10xxxxxx |
---|
261 | * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx |
---|
262 | * |
---|
263 | * Check for the 0x110000 limit too |
---|
264 | */ |
---|
265 | const unsigned char *cur = ctxt->input->cur; |
---|
266 | unsigned char c; |
---|
267 | unsigned int val; |
---|
268 | |
---|
269 | c = *cur; |
---|
270 | if (c & 0x80) { |
---|
271 | if (cur[1] == 0) |
---|
272 | xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
---|
273 | if ((cur[1] & 0xc0) != 0x80) |
---|
274 | goto encoding_error; |
---|
275 | if ((c & 0xe0) == 0xe0) { |
---|
276 | |
---|
277 | if (cur[2] == 0) |
---|
278 | xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
---|
279 | if ((cur[2] & 0xc0) != 0x80) |
---|
280 | goto encoding_error; |
---|
281 | if ((c & 0xf0) == 0xf0) { |
---|
282 | if (cur[3] == 0) |
---|
283 | xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
---|
284 | if (((c & 0xf8) != 0xf0) || |
---|
285 | ((cur[3] & 0xc0) != 0x80)) |
---|
286 | goto encoding_error; |
---|
287 | /* 4-byte code */ |
---|
288 | *len = 4; |
---|
289 | val = (cur[0] & 0x7) << 18; |
---|
290 | val |= (cur[1] & 0x3f) << 12; |
---|
291 | val |= (cur[2] & 0x3f) << 6; |
---|
292 | val |= cur[3] & 0x3f; |
---|
293 | } else { |
---|
294 | /* 3-byte code */ |
---|
295 | *len = 3; |
---|
296 | val = (cur[0] & 0xf) << 12; |
---|
297 | val |= (cur[1] & 0x3f) << 6; |
---|
298 | val |= cur[2] & 0x3f; |
---|
299 | } |
---|
300 | } else { |
---|
301 | /* 2-byte code */ |
---|
302 | *len = 2; |
---|
303 | val = (cur[0] & 0x1f) << 6; |
---|
304 | val |= cur[1] & 0x3f; |
---|
305 | } |
---|
306 | if (!IS_CHAR(val)) { |
---|
307 | ctxt->errNo = XML_ERR_INVALID_ENCODING; |
---|
308 | if ((ctxt->sax != NULL) && |
---|
309 | (ctxt->sax->error != NULL)) |
---|
310 | ctxt->sax->error(ctxt->userData, |
---|
311 | "Char 0x%X out of allowed range\n", val); |
---|
312 | ctxt->wellFormed = 0; |
---|
313 | ctxt->disableSAX = 1; |
---|
314 | } |
---|
315 | return(val); |
---|
316 | } else { |
---|
317 | /* 1-byte code */ |
---|
318 | *len = 1; |
---|
319 | return((int) *ctxt->input->cur); |
---|
320 | } |
---|
321 | } |
---|
322 | /* |
---|
323 | * Assume it's a fixed length encoding (1) with |
---|
324 | * a compatible encoding for the ASCII set, since |
---|
325 | * XML constructs only use < 128 chars |
---|
326 | */ |
---|
327 | *len = 1; |
---|
328 | if ((int) *ctxt->input->cur < 0x80) |
---|
329 | return((int) *ctxt->input->cur); |
---|
330 | |
---|
331 | /* |
---|
332 | * Humm this is bad, do an automatic flow conversion |
---|
333 | */ |
---|
334 | xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1); |
---|
335 | ctxt->charset = XML_CHAR_ENCODING_UTF8; |
---|
336 | return(xmlCurrentChar(ctxt, len)); |
---|
337 | |
---|
338 | encoding_error: |
---|
339 | /* |
---|
340 | * If we detect an UTF8 error that probably mean that the |
---|
341 | * input encoding didn't get properly advertized in the |
---|
342 | * declaration header. Report the error and switch the encoding |
---|
343 | * to ISO-Latin-1 (if you don't like this policy, just declare the |
---|
344 | * encoding !) |
---|
345 | */ |
---|
346 | ctxt->errNo = XML_ERR_INVALID_ENCODING; |
---|
347 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { |
---|
348 | ctxt->sax->error(ctxt->userData, |
---|
349 | "Input is not proper UTF-8, indicate encoding !\n"); |
---|
350 | ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", |
---|
351 | ctxt->input->cur[0], ctxt->input->cur[1], |
---|
352 | ctxt->input->cur[2], ctxt->input->cur[3]); |
---|
353 | } |
---|
354 | |
---|
355 | ctxt->charset = XML_CHAR_ENCODING_8859_1; |
---|
356 | *len = 1; |
---|
357 | return((int) *ctxt->input->cur); |
---|
358 | } |
---|
359 | |
---|
360 | #if 0 |
---|
361 | /** |
---|
362 | * sgmlNextChar: |
---|
363 | * @ctxt: the DocBook SGML parser context |
---|
364 | * |
---|
365 | * Skip to the next char input char. |
---|
366 | */ |
---|
367 | |
---|
368 | static void |
---|
369 | sgmlNextChar(docbParserCtxtPtr ctxt) { |
---|
370 | if (ctxt->instate == XML_PARSER_EOF) |
---|
371 | return; |
---|
372 | if ((*ctxt->input->cur == 0) && |
---|
373 | (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { |
---|
374 | xmlPopInput(ctxt); |
---|
375 | } else { |
---|
376 | if (*(ctxt->input->cur) == '\n') { |
---|
377 | ctxt->input->line++; ctxt->input->col = 1; |
---|
378 | } else ctxt->input->col++; |
---|
379 | ctxt->input->cur++; |
---|
380 | ctxt->nbChars++; |
---|
381 | if (*ctxt->input->cur == 0) |
---|
382 | xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
---|
383 | } |
---|
384 | } |
---|
385 | #endif |
---|
386 | |
---|
387 | /** |
---|
388 | * docbSkipBlankChars: |
---|
389 | * @ctxt: the DocBook SGML parser context |
---|
390 | * |
---|
391 | * skip all blanks character found at that point in the input streams. |
---|
392 | * |
---|
393 | * Returns the number of space chars skipped |
---|
394 | */ |
---|
395 | |
---|
396 | static int |
---|
397 | docbSkipBlankChars(xmlParserCtxtPtr ctxt) { |
---|
398 | int res = 0; |
---|
399 | |
---|
400 | while (IS_BLANK(*(ctxt->input->cur))) { |
---|
401 | if ((*ctxt->input->cur == 0) && |
---|
402 | (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { |
---|
403 | xmlPopInput(ctxt); |
---|
404 | } else { |
---|
405 | if (*(ctxt->input->cur) == '\n') { |
---|
406 | ctxt->input->line++; ctxt->input->col = 1; |
---|
407 | } else ctxt->input->col++; |
---|
408 | ctxt->input->cur++; |
---|
409 | ctxt->nbChars++; |
---|
410 | if (*ctxt->input->cur == 0) |
---|
411 | xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
---|
412 | } |
---|
413 | res++; |
---|
414 | } |
---|
415 | return(res); |
---|
416 | } |
---|
417 | |
---|
418 | |
---|
419 | |
---|
420 | /************************************************************************ |
---|
421 | * * |
---|
422 | * The list of SGML elements and their properties * |
---|
423 | * * |
---|
424 | ************************************************************************/ |
---|
425 | |
---|
426 | /* |
---|
427 | * Start Tag: 1 means the start tag can be ommited |
---|
428 | * End Tag: 1 means the end tag can be ommited |
---|
429 | * 2 means it's forbidden (empty elements) |
---|
430 | * Depr: this element is deprecated |
---|
431 | * DTD: 1 means that this element is valid only in the Loose DTD |
---|
432 | * 2 means that this element is valid only in the Frameset DTD |
---|
433 | * |
---|
434 | * Name,Start Tag,End Tag, Empty, Depr., DTD, Description |
---|
435 | */ |
---|
436 | static docbElemDesc |
---|
437 | docbookElementTable[] = { |
---|
438 | { "abbrev", 0, 0, 0, 3, 0, "" }, /* word */ |
---|
439 | { "abstract", 0, 0, 0, 9, 0, "" }, /* title */ |
---|
440 | { "accel", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
441 | { "ackno", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
442 | { "acronym", 0, 0, 0, 3, 0, "" }, /* word */ |
---|
443 | { "action", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
444 | { "address", 0, 0, 0, 1, 0, "" }, |
---|
445 | { "affiliation",0, 0, 0, 9, 0, "" }, /* shortaffil */ |
---|
446 | { "alt", 0, 0, 0, 1, 0, "" }, |
---|
447 | { "anchor", 0, 2, 1, 0, 0, "" }, |
---|
448 | { "answer", 0, 0, 0, 9, 0, "" }, /* label */ |
---|
449 | { "appendix", 0, 0, 0, 9, 0, "" }, /* appendixinfo */ |
---|
450 | { "appendixinfo",0, 0, 0, 9, 0, "" }, /* graphic */ |
---|
451 | { "application",0, 0, 0, 2, 0, "" }, /* para */ |
---|
452 | { "area", 0, 2, 1, 0, 0, "" }, |
---|
453 | { "areaset", 0, 0, 0, 9, 0, "" }, /* area */ |
---|
454 | { "areaspec", 0, 0, 0, 9, 0, "" }, /* area */ |
---|
455 | { "arg", 0, 0, 0, 1, 0, "" }, |
---|
456 | { "artheader", 0, 0, 0, 9, 0, "" }, |
---|
457 | { "article", 0, 0, 0, 9, 0, "" }, /* div.title.content */ |
---|
458 | { "articleinfo",0, 0, 0, 9, 0, "" }, /* graphic */ |
---|
459 | { "artpagenums",0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
460 | { "attribution",0, 0, 0, 2, 0, "" }, /* para */ |
---|
461 | { "audiodata", 0, 2, 1, 0, 0, "" }, |
---|
462 | { "audioobject",0, 0, 0, 9, 0, "" }, /* objectinfo */ |
---|
463 | { "authorblurb",0, 0, 0, 9, 0, "" }, /* title */ |
---|
464 | { "authorgroup",0, 0, 0, 9, 0, "" }, /* author */ |
---|
465 | { "authorinitials",0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
466 | { "author", 0, 0, 0, 9, 0, "" }, /* person.ident.mix */ |
---|
467 | { "beginpage", 0, 2, 1, 0, 0, "" }, |
---|
468 | { "bibliodiv", 0, 0, 0, 9, 0, "" }, /* sect.title.content */ |
---|
469 | { "biblioentry",0, 0, 0, 9, 0, "" }, /* articleinfo */ |
---|
470 | { "bibliography",0, 0, 0, 9, 0, "" }, /* bibliographyinfo */ |
---|
471 | { "bibliographyinfo",0, 0, 0, 9, 0, "" }, /* graphic */ |
---|
472 | { "bibliomisc", 0, 0, 0, 2, 0, "" }, /* para */ |
---|
473 | { "bibliomixed",0, 0, 0, 1, 0, "" }, /* %bibliocomponent.mix, bibliomset) */ |
---|
474 | { "bibliomset", 0, 0, 0, 1, 0, "" }, /* %bibliocomponent.mix; | bibliomset) */ |
---|
475 | { "biblioset", 0, 0, 0, 9, 0, "" }, /* bibliocomponent.mix */ |
---|
476 | { "blockquote", 0, 0, 0, 9, 0, "" }, /* title */ |
---|
477 | { "book", 0, 0, 0, 9, 0, "" }, /* div.title.content */ |
---|
478 | { "bookinfo", 0, 0, 0, 9, 0, "" }, /* graphic */ |
---|
479 | { "bridgehead", 0, 0, 0, 8, 0, "" }, /* title */ |
---|
480 | { "callout", 0, 0, 0, 9, 0, "" }, /* component.mix */ |
---|
481 | { "calloutlist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */ |
---|
482 | { "caption", 0, 0, 0, 9, 0, "" }, /* textobject.mix */ |
---|
483 | { "caution", 0, 0, 0, 9, 0, "" }, /* title */ |
---|
484 | { "chapter", 0, 0, 0, 9, 0, "" }, /* chapterinfo */ |
---|
485 | { "chapterinfo",0, 0, 0, 9, 0, "" }, /* graphic */ |
---|
486 | { "citation", 0, 0, 0, 2, 0, "" }, /* para */ |
---|
487 | { "citerefentry",0, 0, 0, 9, 0, "" }, /* refentrytitle */ |
---|
488 | { "citetitle", 0, 0, 0, 2, 0, "" }, /* para */ |
---|
489 | { "city", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
490 | { "classname", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
491 | { "classsynopsisinfo",0,0, 0, 9, 0, "" }, /* cptr */ |
---|
492 | { "classsynopsis",0, 0, 0, 9, 0, "" }, /* ooclass */ |
---|
493 | { "cmdsynopsis",0, 0, 0, 9, 0, "" }, /* command */ |
---|
494 | { "co", 0, 2, 1, 0, 0, "" }, |
---|
495 | { "collab", 0, 0, 0, 9, 0, "" }, /* collabname */ |
---|
496 | { "collabname", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
497 | { "colophon", 0, 0, 0, 9, 0, "" }, /* sect.title.content */ |
---|
498 | { "colspec", 0, 2, 1, 0, 0, "" }, |
---|
499 | { "colspec", 0, 2, 1, 0, 0, "" }, |
---|
500 | { "command", 0, 0, 0, 9, 0, "" }, /* cptr */ |
---|
501 | { "computeroutput",0, 0, 0, 9, 0, "" }, /* cptr */ |
---|
502 | { "confdates", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
503 | { "confgroup", 0, 0, 0, 9, 0, "" }, /* confdates */ |
---|
504 | { "confnum", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
505 | { "confsponsor",0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
506 | { "conftitle", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
507 | { "constant", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
508 | { "constructorsynopsis",0,0, 0, 9, 0, "" }, /* modifier */ |
---|
509 | { "contractnum",0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
510 | { "contractsponsor",0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
511 | { "contrib", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
512 | { "copyright", 0, 0, 0, 9, 0, "" }, /* year */ |
---|
513 | { "corpauthor", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
514 | { "corpname", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
515 | { "country", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
516 | { "database", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
517 | { "date", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
518 | { "dedication", 0, 0, 0, 9, 0, "" }, /* sect.title.content */ |
---|
519 | { "destructorsynopsis",0,0, 0, 9, 0, "" }, /* modifier */ |
---|
520 | { "docinfo", 0, 0, 0, 9, 0, "" }, |
---|
521 | { "edition", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
522 | { "editor", 0, 0, 0, 9, 0, "" }, /* person.ident.mix */ |
---|
523 | { "email", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
524 | { "emphasis", 0, 0, 0, 2, 0, "" }, /* para */ |
---|
525 | { "entry", 0, 0, 0, 9, 0, "" }, /* tbl.entry.mdl */ |
---|
526 | { "entrytbl", 0, 0, 0, 9, 0, "" }, /* tbl.entrytbl.mdl */ |
---|
527 | { "envar", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
528 | { "epigraph", 0, 0, 0, 9, 0, "" }, /* attribution */ |
---|
529 | { "equation", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */ |
---|
530 | { "errorcode", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
531 | { "errorname", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
532 | { "errortype", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
533 | { "example", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */ |
---|
534 | { "exceptionname",0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
535 | { "fax", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
536 | { "fieldsynopsis", 0, 0, 0, 9, 0, "" }, /* modifier */ |
---|
537 | { "figure", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */ |
---|
538 | { "filename", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
539 | { "firstname", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
540 | { "firstterm", 0, 0, 0, 3, 0, "" }, /* word */ |
---|
541 | { "footnote", 0, 0, 0, 9, 0, "" }, /* footnote.mix */ |
---|
542 | { "footnoteref",0, 2, 1, 0, 0, "" }, |
---|
543 | { "foreignphrase",0, 0, 0, 2, 0, "" }, /* para */ |
---|
544 | { "formalpara", 0, 0, 0, 9, 0, "" }, /* title */ |
---|
545 | { "funcdef", 0, 0, 0, 1, 0, "" }, |
---|
546 | { "funcparams", 0, 0, 0, 9, 0, "" }, /* cptr */ |
---|
547 | { "funcprototype",0, 0, 0, 9, 0, "" }, /* funcdef */ |
---|
548 | { "funcsynopsis",0, 0, 0, 9, 0, "" }, /* funcsynopsisinfo */ |
---|
549 | { "funcsynopsisinfo", 0, 0, 0, 9, 0, "" }, /* cptr */ |
---|
550 | { "function", 0, 0, 0, 9, 0, "" }, /* cptr */ |
---|
551 | { "glossary", 0, 0, 0, 9, 0, "" }, /* glossaryinfo */ |
---|
552 | { "glossaryinfo",0, 0, 0, 9, 0, "" }, /* graphic */ |
---|
553 | { "glossdef", 0, 0, 0, 9, 0, "" }, /* glossdef.mix */ |
---|
554 | { "glossdiv", 0, 0, 0, 9, 0, "" }, /* sect.title.content */ |
---|
555 | { "glossentry", 0, 0, 0, 9, 0, "" }, /* glossterm */ |
---|
556 | { "glosslist", 0, 0, 0, 9, 0, "" }, /* glossentry */ |
---|
557 | { "glossseealso",0, 0, 1, 2, 0, "" }, /* para */ |
---|
558 | { "glosssee", 0, 0, 1, 2, 0, "" }, /* para */ |
---|
559 | { "glossterm", 0, 0, 0, 2, 0, "" }, /* para */ |
---|
560 | { "graphic", 0, 0, 0, 9, 0, "" }, |
---|
561 | { "graphicco", 0, 0, 0, 9, 0, "" }, /* areaspec */ |
---|
562 | { "group", 0, 0, 0, 9, 0, "" }, /* arg */ |
---|
563 | { "guibutton", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
564 | { "guiicon", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
565 | { "guilabel", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
566 | { "guimenuitem",0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
567 | { "guimenu", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
568 | { "guisubmenu", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
569 | { "hardware", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
570 | { "highlights", 0, 0, 0, 9, 0, "" }, /* highlights.mix */ |
---|
571 | { "holder", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
572 | { "honorific", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
573 | { "imagedata", 0, 2, 1, 0, 0, "" }, |
---|
574 | { "imageobjectco",0, 0, 0, 9, 0, "" }, /* areaspec */ |
---|
575 | { "imageobject",0, 0, 0, 9, 0, "" }, /* objectinfo */ |
---|
576 | { "important", 0, 0, 0, 9, 0, "" }, /* title */ |
---|
577 | { "indexdiv", 0, 0, 0, 9, 0, "" }, /* sect.title.content */ |
---|
578 | { "indexentry", 0, 0, 0, 9, 0, "" }, /* primaryie */ |
---|
579 | { "index", 0, 0, 0, 9, 0, "" }, /* indexinfo */ |
---|
580 | { "indexinfo", 0, 0, 0, 9, 0, "" }, /* graphic */ |
---|
581 | { "indexterm", 0, 0, 0, 9, 0, "" }, /* primary */ |
---|
582 | { "informalequation",0, 0, 0, 9, 0, "" }, /* equation.content */ |
---|
583 | { "informalexample",0, 0, 0, 9, 0, "" }, /* example.mix */ |
---|
584 | { "informalfigure",0, 0, 0, 9, 0, "" }, /* figure.mix */ |
---|
585 | { "informaltable",0, 0, 0, 9, 0, "" }, /* graphic */ |
---|
586 | { "initializer",0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
587 | { "inlineequation",0, 0, 0, 9, 0, "" }, /* inlineequation.content */ |
---|
588 | { "inlinegraphic",0, 0, 0, 9, 0, "" }, |
---|
589 | { "inlinemediaobject",0,0, 0, 9, 0, "" }, /* objectinfo */ |
---|
590 | { "interfacename",0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
591 | { "interface", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
592 | { "invpartnumber",0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
593 | { "isbn", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
594 | { "issn", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
595 | { "issuenum", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
596 | { "itemizedlist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */ |
---|
597 | { "itermset", 0, 0, 0, 9, 0, "" }, /* indexterm */ |
---|
598 | { "jobtitle", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
599 | { "keycap", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
600 | { "keycode", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
601 | { "keycombo", 0, 0, 0, 9, 0, "" }, /* keycap */ |
---|
602 | { "keysym", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
603 | { "keyword", 0, 0, 0, 1, 0, "" }, |
---|
604 | { "keywordset", 0, 0, 0, 9, 0, "" }, /* keyword */ |
---|
605 | { "label", 0, 0, 0, 3, 0, "" }, /* word */ |
---|
606 | { "legalnotice",0, 0, 0, 9, 0, "" }, /* title */ |
---|
607 | { "lineage", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
608 | { "lineannotation",0, 0, 0, 2, 0, "" }, /* para */ |
---|
609 | { "link", 0, 0, 0, 2, 0, "" }, /* para */ |
---|
610 | { "listitem", 0, 0, 0, 9, 0, "" }, /* component.mix */ |
---|
611 | { "literal", 0, 0, 0, 9, 0, "" }, /* cptr */ |
---|
612 | { "literallayout",0, 0, 0, 2, 0, "" }, /* para */ |
---|
613 | { "lot", 0, 0, 0, 9, 0, "" }, /* bookcomponent.title.content */ |
---|
614 | { "lotentry", 0, 0, 0, 2, 0, "" }, /* para */ |
---|
615 | { "manvolnum", 0, 0, 0, 3, 0, "" }, /* word */ |
---|
616 | { "markup", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
617 | { "medialabel", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
618 | { "mediaobjectco",0, 0, 0, 9, 0, "" }, /* objectinfo */ |
---|
619 | { "mediaobject",0, 0, 0, 9, 0, "" }, /* objectinfo */ |
---|
620 | { "member", 0, 0, 0, 2, 0, "" }, /* para */ |
---|
621 | { "menuchoice", 0, 0, 0, 9, 0, "" }, /* shortcut */ |
---|
622 | { "methodname", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
623 | { "methodparam",0, 0, 0, 9, 0, "" }, /* modifier */ |
---|
624 | { "methodsynopsis",0, 0, 0, 9, 0, "" }, /* modifier */ |
---|
625 | { "modespec", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
626 | { "modifier", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
627 | { "mousebutton",0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
628 | { "msgaud", 0, 0, 0, 2, 0, "" }, /* para */ |
---|
629 | { "msgentry", 0, 0, 0, 9, 0, "" }, /* msg */ |
---|
630 | { "msgexplan", 0, 0, 0, 9, 0, "" }, /* title */ |
---|
631 | { "msginfo", 0, 0, 0, 9, 0, "" }, /* msglevel */ |
---|
632 | { "msglevel", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
633 | { "msgmain", 0, 0, 0, 9, 0, "" }, /* title */ |
---|
634 | { "msgorig", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
635 | { "msgrel", 0, 0, 0, 9, 0, "" }, /* title */ |
---|
636 | { "msgset", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */ |
---|
637 | { "msgsub", 0, 0, 0, 9, 0, "" }, /* title */ |
---|
638 | { "msgtext", 0, 0, 0, 9, 0, "" }, /* component.mix */ |
---|
639 | { "msg", 0, 0, 0, 9, 0, "" }, /* title */ |
---|
640 | { "note", 0, 0, 0, 9, 0, "" }, /* title */ |
---|
641 | { "objectinfo", 0, 0, 0, 9, 0, "" }, /* graphic */ |
---|
642 | { "olink", 0, 0, 0, 2, 0, "" }, /* para */ |
---|
643 | { "ooclass", 0, 0, 0, 9, 0, "" }, /* modifier */ |
---|
644 | { "ooexception",0, 0, 0, 9, 0, "" }, /* modifier */ |
---|
645 | { "oointerface",0, 0, 0, 9, 0, "" }, /* modifier */ |
---|
646 | { "optional", 0, 0, 0, 9, 0, "" }, /* cptr */ |
---|
647 | { "option", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
648 | { "orderedlist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */ |
---|
649 | { "orgdiv", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
650 | { "orgname", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
651 | { "otheraddr", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
652 | { "othercredit",0, 0, 0, 9, 0, "" }, /* person.ident.mix */ |
---|
653 | { "othername", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
654 | { "pagenums", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
655 | { "paramdef", 0, 0, 0, 1, 0, "" }, |
---|
656 | { "parameter", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
657 | { "para", 0, 0, 0, 2, 0, "" }, /* para */ |
---|
658 | { "partinfo", 0, 0, 0, 9, 0, "" }, /* graphic */ |
---|
659 | { "partintro", 0, 0, 0, 9, 0, "" }, /* div.title.content */ |
---|
660 | { "part", 0, 0, 0, 9, 0, "" }, /* partinfo */ |
---|
661 | { "phone", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
662 | { "phrase", 0, 0, 0, 2, 0, "" }, /* para */ |
---|
663 | { "pob", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
664 | { "postcode", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
665 | { "prefaceinfo",0, 0, 0, 9, 0, "" }, /* graphic */ |
---|
666 | { "preface", 0, 0, 0, 9, 0, "" }, /* prefaceinfo */ |
---|
667 | { "primaryie", 0, 0, 0, 4, 0, "" }, /* ndxterm */ |
---|
668 | { "primary", 0, 0, 0, 9, 0, "" }, /* ndxterm */ |
---|
669 | { "printhistory",0, 0, 0, 9, 0, "" }, /* para.class */ |
---|
670 | { "procedure", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */ |
---|
671 | { "productname",0, 0, 0, 2, 0, "" }, /* para */ |
---|
672 | { "productnumber",0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
673 | { "programlistingco",0, 0, 0, 9, 0, "" }, /* areaspec */ |
---|
674 | { "programlisting",0, 0, 0, 2, 0, "" }, /* para */ |
---|
675 | { "prompt", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
676 | { "property", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
677 | { "pubdate", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
678 | { "publishername",0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
679 | { "publisher", 0, 0, 0, 9, 0, "" }, /* publishername */ |
---|
680 | { "pubsnumber", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
681 | { "qandadiv", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */ |
---|
682 | { "qandaentry", 0, 0, 0, 9, 0, "" }, /* revhistory */ |
---|
683 | { "qandaset", 0, 0, 0, 9, 0, "" }, /* formalobject.title.content */ |
---|
684 | { "question", 0, 0, 0, 9, 0, "" }, /* label */ |
---|
685 | { "quote", 0, 0, 0, 2, 0, "" }, /* para */ |
---|
686 | { "refclass", 0, 0, 0, 9, 0, "" }, /* refclass.char.mix */ |
---|
687 | { "refdescriptor",0, 0, 0, 9, 0, "" }, /* refname.char.mix */ |
---|
688 | { "refentryinfo",0, 0, 0, 9, 0, "" }, /* graphic */ |
---|
689 | { "refentry", 0, 0, 0, 9, 0, "" }, /* ndxterm.class */ |
---|
690 | { "refentrytitle",0, 0, 0, 2, 0, "" }, /* para */ |
---|
691 | { "referenceinfo",0, 0, 0, 9, 0, "" }, /* graphic */ |
---|
692 | { "reference", 0, 0, 0, 9, 0, "" }, /* referenceinfo */ |
---|
693 | { "refmeta", 0, 0, 0, 9, 0, "" }, /* ndxterm.class */ |
---|
694 | { "refmiscinfo",0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
695 | { "refnamediv", 0, 0, 0, 9, 0, "" }, /* refdescriptor */ |
---|
696 | { "refname", 0, 0, 0, 9, 0, "" }, /* refname.char.mix */ |
---|
697 | { "refpurpose", 0, 0, 0, 9, 0, "" }, /* refinline.char.mix */ |
---|
698 | { "refsect1info",0, 0, 0, 9, 0, "" }, /* graphic */ |
---|
699 | { "refsect1", 0, 0, 0, 9, 0, "" }, /* refsect */ |
---|
700 | { "refsect2info",0, 0, 0, 9, 0, "" }, /* graphic */ |
---|
701 | { "refsect2", 0, 0, 0, 9, 0, "" }, /* refsect */ |
---|
702 | { "refsect3info",0, 0, 0, 9, 0, "" }, /* graphic */ |
---|
703 | { "refsect3", 0, 0, 0, 9, 0, "" }, /* refsect */ |
---|
704 | { "refsynopsisdivinfo",0,0, 0, 9, 0, "" }, /* graphic */ |
---|
705 | { "refsynopsisdiv",0, 0, 0, 9, 0, "" }, /* refsynopsisdivinfo */ |
---|
706 | { "releaseinfo",0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
707 | { "remark", 0, 0, 0, 2, 0, "" }, /* para */ |
---|
708 | { "replaceable",0, 0, 0, 1, 0, "" }, |
---|
709 | { "returnvalue",0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
710 | { "revdescription",0, 0, 0, 9, 0, "" }, /* revdescription.mix */ |
---|
711 | { "revhistory", 0, 0, 0, 9, 0, "" }, /* revision */ |
---|
712 | { "revision", 0, 0, 0, 9, 0, "" }, /* revnumber */ |
---|
713 | { "revnumber", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
714 | { "revremark", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
715 | { "row", 0, 0, 0, 9, 0, "" }, /* tbl.row.mdl */ |
---|
716 | { "row", 0, 0, 0, 9, 0, "" }, /* tbl.row.mdl */ |
---|
717 | { "sbr", 0, 2, 1, 0, 0, "" }, |
---|
718 | { "screenco", 0, 0, 0, 9, 0, "" }, /* areaspec */ |
---|
719 | { "screeninfo", 0, 0, 0, 2, 0, "" }, /* para */ |
---|
720 | { "screen", 0, 0, 0, 2, 0, "" }, /* para */ |
---|
721 | { "screenshot", 0, 0, 0, 9, 0, "" }, /* screeninfo */ |
---|
722 | { "secondaryie",0, 0, 0, 4, 0, "" }, /* ndxterm */ |
---|
723 | { "secondary", 0, 0, 0, 4, 0, "" }, /* ndxterm */ |
---|
724 | { "sect1info", 0, 0, 0, 9, 0, "" }, /* graphic */ |
---|
725 | { "sect1", 0, 0, 0, 9, 0, "" }, /* sect */ |
---|
726 | { "sect2info", 0, 0, 0, 9, 0, "" }, /* graphic */ |
---|
727 | { "sect2", 0, 0, 0, 9, 0, "" }, /* sect */ |
---|
728 | { "sect3info", 0, 0, 0, 9, 0, "" }, /* graphic */ |
---|
729 | { "sect3", 0, 0, 0, 9, 0, "" }, /* sect */ |
---|
730 | { "sect4info", 0, 0, 0, 9, 0, "" }, /* graphic */ |
---|
731 | { "sect4", 0, 0, 0, 9, 0, "" }, /* sect */ |
---|
732 | { "sect5info", 0, 0, 0, 9, 0, "" }, /* graphic */ |
---|
733 | { "sect5", 0, 0, 0, 9, 0, "" }, /* sect */ |
---|
734 | { "sectioninfo",0, 0, 0, 9, 0, "" }, /* graphic */ |
---|
735 | { "section", 0, 0, 0, 9, 0, "" }, /* sectioninfo */ |
---|
736 | { "seealsoie", 0, 0, 0, 4, 0, "" }, /* ndxterm */ |
---|
737 | { "seealso", 0, 0, 0, 4, 0, "" }, /* ndxterm */ |
---|
738 | { "seeie", 0, 0, 0, 4, 0, "" }, /* ndxterm */ |
---|
739 | { "see", 0, 0, 0, 4, 0, "" }, /* ndxterm */ |
---|
740 | { "seglistitem",0, 0, 0, 9, 0, "" }, /* seg */ |
---|
741 | { "segmentedlist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */ |
---|
742 | { "seg", 0, 0, 0, 2, 0, "" }, /* para */ |
---|
743 | { "segtitle", 0, 0, 0, 8, 0, "" }, /* title */ |
---|
744 | { "seriesvolnums", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
745 | { "set", 0, 0, 0, 9, 0, "" }, /* div.title.content */ |
---|
746 | { "setindexinfo",0, 0, 0, 9, 0, "" }, /* graphic */ |
---|
747 | { "setindex", 0, 0, 0, 9, 0, "" }, /* setindexinfo */ |
---|
748 | { "setinfo", 0, 0, 0, 9, 0, "" }, /* graphic */ |
---|
749 | { "sgmltag", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
750 | { "shortaffil", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
751 | { "shortcut", 0, 0, 0, 9, 0, "" }, /* keycap */ |
---|
752 | { "sidebarinfo",0, 0, 0, 9, 0, "" }, /* graphic */ |
---|
753 | { "sidebar", 0, 0, 0, 9, 0, "" }, /* sidebarinfo */ |
---|
754 | { "simpara", 0, 0, 0, 2, 0, "" }, /* para */ |
---|
755 | { "simplelist", 0, 0, 0, 9, 0, "" }, /* member */ |
---|
756 | { "simplemsgentry", 0, 0, 0, 9, 0, "" }, /* msgtext */ |
---|
757 | { "simplesect", 0, 0, 0, 9, 0, "" }, /* sect.title.content */ |
---|
758 | { "spanspec", 0, 2, 1, 0, 0, "" }, |
---|
759 | { "state", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
760 | { "step", 0, 0, 0, 9, 0, "" }, /* title */ |
---|
761 | { "street", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
762 | { "structfield",0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
763 | { "structname", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
764 | { "subjectset", 0, 0, 0, 9, 0, "" }, /* subject */ |
---|
765 | { "subject", 0, 0, 0, 9, 0, "" }, /* subjectterm */ |
---|
766 | { "subjectterm",0, 0, 0, 1, 0, "" }, |
---|
767 | { "subscript", 0, 0, 0, 1, 0, "" }, |
---|
768 | { "substeps", 0, 0, 0, 9, 0, "" }, /* step */ |
---|
769 | { "subtitle", 0, 0, 0, 8, 0, "" }, /* title */ |
---|
770 | { "superscript", 0, 0, 0, 1, 0, "" }, |
---|
771 | { "surname", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
772 | { "symbol", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
773 | { "synopfragment", 0, 0, 0, 9, 0, "" }, /* arg */ |
---|
774 | { "synopfragmentref", 0, 0, 0, 1, 0, "" }, |
---|
775 | { "synopsis", 0, 0, 0, 2, 0, "" }, /* para */ |
---|
776 | { "systemitem", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
777 | { "table", 0, 0, 0, 9, 0, "" }, /* tbl.table.mdl */ |
---|
778 | /* { "%tbl.table.name;", 0, 0, 0, 9, 0, "" },*/ /* tbl.table.mdl */ |
---|
779 | { "tbody", 0, 0, 0, 9, 0, "" }, /* row */ |
---|
780 | { "tbody", 0, 0, 0, 9, 0, "" }, /* row */ |
---|
781 | { "term", 0, 0, 0, 2, 0, "" }, /* para */ |
---|
782 | { "tertiaryie", 0, 0, 0, 4, 0, "" }, /* ndxterm */ |
---|
783 | { "tertiary ", 0, 0, 0, 4, 0, "" }, /* ndxterm */ |
---|
784 | { "textobject", 0, 0, 0, 9, 0, "" }, /* objectinfo */ |
---|
785 | { "tfoot", 0, 0, 0, 9, 0, "" }, /* tbl.hdft.mdl */ |
---|
786 | { "tgroup", 0, 0, 0, 9, 0, "" }, /* tbl.tgroup.mdl */ |
---|
787 | { "tgroup", 0, 0, 0, 9, 0, "" }, /* tbl.tgroup.mdl */ |
---|
788 | { "thead", 0, 0, 0, 9, 0, "" }, /* row */ |
---|
789 | { "thead", 0, 0, 0, 9, 0, "" }, /* tbl.hdft.mdl */ |
---|
790 | { "tip", 0, 0, 0, 9, 0, "" }, /* title */ |
---|
791 | { "titleabbrev",0, 0, 0, 8, 0, "" }, /* title */ |
---|
792 | { "title", 0, 0, 0, 8, 0, "" }, /* title */ |
---|
793 | { "tocback", 0, 0, 0, 2, 0, "" }, /* para */ |
---|
794 | { "toc", 0, 0, 0, 9, 0, "" }, /* bookcomponent.title.content */ |
---|
795 | { "tocchap", 0, 0, 0, 9, 0, "" }, /* tocentry */ |
---|
796 | { "tocentry", 0, 0, 0, 2, 0, "" }, /* para */ |
---|
797 | { "tocfront", 0, 0, 0, 2, 0, "" }, /* para */ |
---|
798 | { "toclevel1", 0, 0, 0, 9, 0, "" }, /* tocentry */ |
---|
799 | { "toclevel2", 0, 0, 0, 9, 0, "" }, /* tocentry */ |
---|
800 | { "toclevel3", 0, 0, 0, 9, 0, "" }, /* tocentry */ |
---|
801 | { "toclevel4", 0, 0, 0, 9, 0, "" }, /* tocentry */ |
---|
802 | { "toclevel5", 0, 0, 0, 9, 0, "" }, /* tocentry */ |
---|
803 | { "tocpart", 0, 0, 0, 9, 0, "" }, /* tocentry */ |
---|
804 | { "token", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
805 | { "trademark", 0, 0, 0, 1, 0, "" }, |
---|
806 | { "type", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
807 | { "ulink", 0, 0, 0, 2, 0, "" }, /* para */ |
---|
808 | { "userinput", 0, 0, 0, 9, 0, "" }, /* cptr */ |
---|
809 | { "varargs", 0, 2, 1, 0, 0, "" }, |
---|
810 | { "variablelist",0, 0, 0, 9, 0, "" }, /* formalobject.title.content */ |
---|
811 | { "varlistentry",0, 0, 0, 9, 0, "" }, /* term */ |
---|
812 | { "varname", 0, 0, 0, 7, 0, "" }, /* smallcptr */ |
---|
813 | { "videodata", 0, 2, 1, 0, 0, "" }, |
---|
814 | { "videoobject",0, 0, 0, 9, 0, "" }, /* objectinfo */ |
---|
815 | { "void", 0, 2, 1, 0, 0, "" }, |
---|
816 | { "volumenum", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
817 | { "warning", 0, 0, 0, 9, 0, "" }, /* title */ |
---|
818 | { "wordasword", 0, 0, 0, 3, 0, "" }, /* word */ |
---|
819 | { "xref", 0, 2, 1, 0, 0, "" }, |
---|
820 | { "year", 0, 0, 0, 4, 0, "" }, /* docinfo */ |
---|
821 | }; |
---|
822 | |
---|
823 | #if 0 |
---|
824 | /* |
---|
825 | * start tags that imply the end of a current element |
---|
826 | * any tag of each line implies the end of the current element if the type of |
---|
827 | * that element is in the same line |
---|
828 | */ |
---|
829 | static const char *docbEquEnd[] = { |
---|
830 | "dt", "dd", "li", "option", NULL, |
---|
831 | "h1", "h2", "h3", "h4", "h5", "h6", NULL, |
---|
832 | "ol", "menu", "dir", "address", "pre", "listing", "xmp", NULL, |
---|
833 | NULL |
---|
834 | }; |
---|
835 | #endif |
---|
836 | |
---|
837 | /* |
---|
838 | * according the SGML DTD, HR should be added to the 2nd line above, as it |
---|
839 | * is not allowed within a H1, H2, H3, etc. But we should tolerate that case |
---|
840 | * because many documents contain rules in headings... |
---|
841 | */ |
---|
842 | |
---|
843 | /* |
---|
844 | * start tags that imply the end of current element |
---|
845 | */ |
---|
846 | static const char *docbStartClose[] = { |
---|
847 | NULL |
---|
848 | }; |
---|
849 | |
---|
850 | static const char** docbStartCloseIndex[100]; |
---|
851 | static int docbStartCloseIndexinitialized = 0; |
---|
852 | |
---|
853 | /************************************************************************ |
---|
854 | * * |
---|
855 | * functions to handle SGML specific data * |
---|
856 | * * |
---|
857 | ************************************************************************/ |
---|
858 | |
---|
859 | /** |
---|
860 | * docbInitAutoClose: |
---|
861 | * |
---|
862 | * Initialize the docbStartCloseIndex for fast lookup of closing tags names. |
---|
863 | * |
---|
864 | */ |
---|
865 | static void |
---|
866 | docbInitAutoClose(void) { |
---|
867 | int indx, i = 0; |
---|
868 | |
---|
869 | if (docbStartCloseIndexinitialized) return; |
---|
870 | |
---|
871 | for (indx = 0;indx < 100;indx ++) docbStartCloseIndex[indx] = NULL; |
---|
872 | indx = 0; |
---|
873 | while ((docbStartClose[i] != NULL) && (indx < 100 - 1)) { |
---|
874 | docbStartCloseIndex[indx++] = &docbStartClose[i]; |
---|
875 | while (docbStartClose[i] != NULL) i++; |
---|
876 | i++; |
---|
877 | } |
---|
878 | } |
---|
879 | |
---|
880 | /** |
---|
881 | * docbTagLookup: |
---|
882 | * @tag: The tag name |
---|
883 | * |
---|
884 | * Lookup the SGML tag in the ElementTable |
---|
885 | * |
---|
886 | * Returns the related docbElemDescPtr or NULL if not found. |
---|
887 | */ |
---|
888 | static docbElemDescPtr |
---|
889 | docbTagLookup(const xmlChar *tag) { |
---|
890 | unsigned int i; |
---|
891 | |
---|
892 | for (i = 0; i < (sizeof(docbookElementTable) / |
---|
893 | sizeof(docbookElementTable[0]));i++) { |
---|
894 | if (xmlStrEqual(tag, BAD_CAST docbookElementTable[i].name)) |
---|
895 | return(&docbookElementTable[i]); |
---|
896 | } |
---|
897 | return(NULL); |
---|
898 | } |
---|
899 | |
---|
900 | /** |
---|
901 | * docbCheckAutoClose: |
---|
902 | * @newtag: The new tag name |
---|
903 | * @oldtag: The old tag name |
---|
904 | * |
---|
905 | * Checks whether the new tag is one of the registered valid tags for |
---|
906 | * closing old. |
---|
907 | * Initialize the docbStartCloseIndex for fast lookup of closing tags names. |
---|
908 | * |
---|
909 | * Returns 0 if no, 1 if yes. |
---|
910 | */ |
---|
911 | static int |
---|
912 | docbCheckAutoClose(const xmlChar *newtag, const xmlChar *oldtag) { |
---|
913 | int i, indx; |
---|
914 | const char **closed = NULL; |
---|
915 | |
---|
916 | if (docbStartCloseIndexinitialized == 0) docbInitAutoClose(); |
---|
917 | |
---|
918 | /* inefficient, but not a big deal */ |
---|
919 | for (indx = 0; indx < 100;indx++) { |
---|
920 | closed = docbStartCloseIndex[indx]; |
---|
921 | if (closed == NULL) return(0); |
---|
922 | if (xmlStrEqual(BAD_CAST *closed, newtag)) break; |
---|
923 | } |
---|
924 | |
---|
925 | i = closed - docbStartClose; |
---|
926 | i++; |
---|
927 | while (docbStartClose[i] != NULL) { |
---|
928 | if (xmlStrEqual(BAD_CAST docbStartClose[i], oldtag)) { |
---|
929 | return(1); |
---|
930 | } |
---|
931 | i++; |
---|
932 | } |
---|
933 | return(0); |
---|
934 | } |
---|
935 | |
---|
936 | /** |
---|
937 | * docbAutoCloseOnClose: |
---|
938 | * @ctxt: an SGML parser context |
---|
939 | * @newtag: The new tag name |
---|
940 | * |
---|
941 | * The DocBook DTD allows an ending tag to implicitly close other tags. |
---|
942 | */ |
---|
943 | static void |
---|
944 | docbAutoCloseOnClose(docbParserCtxtPtr ctxt, const xmlChar *newtag) { |
---|
945 | docbElemDescPtr info; |
---|
946 | xmlChar *oldname; |
---|
947 | int i; |
---|
948 | |
---|
949 | if ((newtag[0] == '/') && (newtag[1] == 0)) |
---|
950 | return; |
---|
951 | |
---|
952 | #ifdef DEBUG |
---|
953 | xmlGenericError(xmlGenericErrorContext,"Close of %s stack: %d elements\n", newtag, ctxt->nameNr); |
---|
954 | for (i = 0;i < ctxt->nameNr;i++) |
---|
955 | xmlGenericError(xmlGenericErrorContext,"%d : %s\n", i, ctxt->nameTab[i]); |
---|
956 | #endif |
---|
957 | |
---|
958 | for (i = (ctxt->nameNr - 1);i >= 0;i--) { |
---|
959 | if (xmlStrEqual(newtag, ctxt->nameTab[i])) break; |
---|
960 | } |
---|
961 | if (i < 0) return; |
---|
962 | |
---|
963 | while (!xmlStrEqual(newtag, ctxt->name)) { |
---|
964 | info = docbTagLookup(ctxt->name); |
---|
965 | if ((info == NULL) || (info->endTag == 1)) { |
---|
966 | #ifdef DEBUG |
---|
967 | xmlGenericError(xmlGenericErrorContext,"docbAutoCloseOnClose: %s closes %s\n", newtag, ctxt->name); |
---|
968 | #endif |
---|
969 | } else { |
---|
970 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
971 | ctxt->sax->error(ctxt->userData, |
---|
972 | "Opening and ending tag mismatch: %s and %s\n", |
---|
973 | newtag, ctxt->name); |
---|
974 | ctxt->wellFormed = 0; |
---|
975 | } |
---|
976 | if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) |
---|
977 | ctxt->sax->endElement(ctxt->userData, ctxt->name); |
---|
978 | oldname = docbnamePop(ctxt); |
---|
979 | if (oldname != NULL) { |
---|
980 | #ifdef DEBUG |
---|
981 | xmlGenericError(xmlGenericErrorContext,"docbAutoCloseOnClose: popped %s\n", oldname); |
---|
982 | #endif |
---|
983 | xmlFree(oldname); |
---|
984 | } |
---|
985 | } |
---|
986 | } |
---|
987 | |
---|
988 | /** |
---|
989 | * docbAutoClose: |
---|
990 | * @ctxt: an SGML parser context |
---|
991 | * @newtag: The new tag name or NULL |
---|
992 | * |
---|
993 | * The DocBook DTD allows a tag to implicitly close other tags. |
---|
994 | * The list is kept in docbStartClose array. This function is |
---|
995 | * called when a new tag has been detected and generates the |
---|
996 | * appropriates closes if possible/needed. |
---|
997 | * If newtag is NULL this mean we are at the end of the resource |
---|
998 | * and we should check |
---|
999 | */ |
---|
1000 | static void |
---|
1001 | docbAutoClose(docbParserCtxtPtr ctxt, const xmlChar *newtag) { |
---|
1002 | xmlChar *oldname; |
---|
1003 | while ((newtag != NULL) && (ctxt->name != NULL) && |
---|
1004 | (docbCheckAutoClose(newtag, ctxt->name))) { |
---|
1005 | #ifdef DEBUG |
---|
1006 | xmlGenericError(xmlGenericErrorContext,"docbAutoClose: %s closes %s\n", newtag, ctxt->name); |
---|
1007 | #endif |
---|
1008 | if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) |
---|
1009 | ctxt->sax->endElement(ctxt->userData, ctxt->name); |
---|
1010 | oldname = docbnamePop(ctxt); |
---|
1011 | if (oldname != NULL) { |
---|
1012 | #ifdef DEBUG |
---|
1013 | xmlGenericError(xmlGenericErrorContext,"docbAutoClose: popped %s\n", oldname); |
---|
1014 | #endif |
---|
1015 | xmlFree(oldname); |
---|
1016 | } |
---|
1017 | } |
---|
1018 | } |
---|
1019 | |
---|
1020 | /** |
---|
1021 | * docbAutoCloseTag: |
---|
1022 | * @doc: the SGML document |
---|
1023 | * @name: The tag name |
---|
1024 | * @elem: the SGML element |
---|
1025 | * |
---|
1026 | * The DocBook DTD allows a tag to implicitly close other tags. |
---|
1027 | * The list is kept in docbStartClose array. This function checks |
---|
1028 | * if the element or one of it's children would autoclose the |
---|
1029 | * given tag. |
---|
1030 | * |
---|
1031 | * Returns 1 if autoclose, 0 otherwise |
---|
1032 | */ |
---|
1033 | static int |
---|
1034 | docbAutoCloseTag(docbDocPtr doc, const xmlChar *name, docbNodePtr elem) { |
---|
1035 | docbNodePtr child; |
---|
1036 | |
---|
1037 | if (elem == NULL) return(1); |
---|
1038 | if (xmlStrEqual(name, elem->name)) return(0); |
---|
1039 | if (docbCheckAutoClose(elem->name, name)) return(1); |
---|
1040 | child = elem->children; |
---|
1041 | while (child != NULL) { |
---|
1042 | if (docbAutoCloseTag(doc, name, child)) return(1); |
---|
1043 | child = child->next; |
---|
1044 | } |
---|
1045 | return(0); |
---|
1046 | } |
---|
1047 | |
---|
1048 | /************************************************************************ |
---|
1049 | * * |
---|
1050 | * The list of SGML predefined entities * |
---|
1051 | * * |
---|
1052 | ************************************************************************/ |
---|
1053 | |
---|
1054 | |
---|
1055 | static docbEntityDesc |
---|
1056 | docbookEntitiesTable[] = { |
---|
1057 | /* |
---|
1058 | * the 4 absolute ones, plus apostrophe. |
---|
1059 | */ |
---|
1060 | { 0x0026, "amp", "AMPERSAND" }, |
---|
1061 | { 0x003C, "lt", "LESS-THAN SIGN" }, |
---|
1062 | |
---|
1063 | /* |
---|
1064 | * Converted with VI macros from docbook ent files |
---|
1065 | */ |
---|
1066 | { 0x0021, "excl", "EXCLAMATION MARK" }, |
---|
1067 | { 0x0022, "quot", "QUOTATION MARK" }, |
---|
1068 | { 0x0023, "num", "NUMBER SIGN" }, |
---|
1069 | { 0x0024, "dollar", "DOLLAR SIGN" }, |
---|
1070 | { 0x0025, "percnt", "PERCENT SIGN" }, |
---|
1071 | { 0x0027, "apos", "APOSTROPHE" }, |
---|
1072 | { 0x0028, "lpar", "LEFT PARENTHESIS" }, |
---|
1073 | { 0x0029, "rpar", "RIGHT PARENTHESIS" }, |
---|
1074 | { 0x002A, "ast", "ASTERISK OPERATOR" }, |
---|
1075 | { 0x002B, "plus", "PLUS SIGN" }, |
---|
1076 | { 0x002C, "comma", "COMMA" }, |
---|
1077 | { 0x002D, "hyphen", "HYPHEN-MINUS" }, |
---|
1078 | { 0x002E, "period", "FULL STOP" }, |
---|
1079 | { 0x002F, "sol", "SOLIDUS" }, |
---|
1080 | { 0x003A, "colon", "COLON" }, |
---|
1081 | { 0x003B, "semi", "SEMICOLON" }, |
---|
1082 | { 0x003D, "equals", "EQUALS SIGN" }, |
---|
1083 | { 0x003E, "gt", "GREATER-THAN SIGN" }, |
---|
1084 | { 0x003F, "quest", "QUESTION MARK" }, |
---|
1085 | { 0x0040, "commat", "COMMERCIAL AT" }, |
---|
1086 | { 0x005B, "lsqb", "LEFT SQUARE BRACKET" }, |
---|
1087 | { 0x005C, "bsol", "REVERSE SOLIDUS" }, |
---|
1088 | { 0x005D, "rsqb", "RIGHT SQUARE BRACKET" }, |
---|
1089 | { 0x005E, "circ", "RING OPERATOR" }, |
---|
1090 | { 0x005F, "lowbar", "LOW LINE" }, |
---|
1091 | { 0x0060, "grave", "GRAVE ACCENT" }, |
---|
1092 | { 0x007B, "lcub", "LEFT CURLY BRACKET" }, |
---|
1093 | { 0x007C, "verbar", "VERTICAL LINE" }, |
---|
1094 | { 0x007D, "rcub", "RIGHT CURLY BRACKET" }, |
---|
1095 | { 0x00A0, "nbsp", "NO-BREAK SPACE" }, |
---|
1096 | { 0x00A1, "iexcl", "INVERTED EXCLAMATION MARK" }, |
---|
1097 | { 0x00A2, "cent", "CENT SIGN" }, |
---|
1098 | { 0x00A3, "pound", "POUND SIGN" }, |
---|
1099 | { 0x00A4, "curren", "CURRENCY SIGN" }, |
---|
1100 | { 0x00A5, "yen", "YEN SIGN" }, |
---|
1101 | { 0x00A6, "brvbar", "BROKEN BAR" }, |
---|
1102 | { 0x00A7, "sect", "SECTION SIGN" }, |
---|
1103 | { 0x00A8, "die", "" }, |
---|
1104 | { 0x00A8, "Dot", "" }, |
---|
1105 | { 0x00A8, "uml", "" }, |
---|
1106 | { 0x00A9, "copy", "COPYRIGHT SIGN" }, |
---|
1107 | { 0x00AA, "ordf", "FEMININE ORDINAL INDICATOR" }, |
---|
1108 | { 0x00AB, "laquo", "LEFT-POINTING DOUBLE ANGLE QUOTATION MARK" }, |
---|
1109 | { 0x00AC, "not", "NOT SIGN" }, |
---|
1110 | { 0x00AD, "shy", "SOFT HYPHEN" }, |
---|
1111 | { 0x00AE, "reg", "REG TRADE MARK SIGN" }, |
---|
1112 | { 0x00AF, "macr", "MACRON" }, |
---|
1113 | { 0x00B0, "deg", "DEGREE SIGN" }, |
---|
1114 | { 0x00B1, "plusmn", "PLUS-MINUS SIGN" }, |
---|
1115 | { 0x00B2, "sup2", "SUPERSCRIPT TWO" }, |
---|
1116 | { 0x00B3, "sup3", "SUPERSCRIPT THREE" }, |
---|
1117 | { 0x00B4, "acute", "ACUTE ACCENT" }, |
---|
1118 | { 0x00B5, "micro", "MICRO SIGN" }, |
---|
1119 | { 0x00B6, "para", "PILCROW SIGN" }, |
---|
1120 | { 0x00B7, "middot", "MIDDLE DOT" }, |
---|
1121 | { 0x00B8, "cedil", "CEDILLA" }, |
---|
1122 | { 0x00B9, "sup1", "SUPERSCRIPT ONE" }, |
---|
1123 | { 0x00BA, "ordm", "MASCULINE ORDINAL INDICATOR" }, |
---|
1124 | { 0x00BB, "raquo", "RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK" }, |
---|
1125 | { 0x00BC, "frac14", "VULGAR FRACTION ONE QUARTER" }, |
---|
1126 | { 0x00BD, "frac12", "VULGAR FRACTION ONE HALF" }, |
---|
1127 | { 0x00BD, "half", "VULGAR FRACTION ONE HALF" }, |
---|
1128 | { 0x00BE, "frac34", "VULGAR FRACTION THREE QUARTERS" }, |
---|
1129 | { 0x00BF, "iquest", "INVERTED QUESTION MARK" }, |
---|
1130 | { 0x00C0, "Agrave", "LATIN CAPITAL LETTER A WITH GRAVE" }, |
---|
1131 | { 0x00C1, "Aacute", "LATIN CAPITAL LETTER A WITH ACUTE" }, |
---|
1132 | { 0x00C2, "Acirc", "LATIN CAPITAL LETTER A WITH CIRCUMFLEX" }, |
---|
1133 | { 0x00C3, "Atilde", "LATIN CAPITAL LETTER A WITH TILDE" }, |
---|
1134 | { 0x00C4, "Auml", "LATIN CAPITAL LETTER A WITH DIAERESIS" }, |
---|
1135 | { 0x00C5, "Aring", "LATIN CAPITAL LETTER A WITH RING ABOVE" }, |
---|
1136 | { 0x00C6, "AElig", "LATIN CAPITAL LETTER AE" }, |
---|
1137 | { 0x00C7, "Ccedil", "LATIN CAPITAL LETTER C WITH CEDILLA" }, |
---|
1138 | { 0x00C8, "Egrave", "LATIN CAPITAL LETTER E WITH GRAVE" }, |
---|
1139 | { 0x00C9, "Eacute", "LATIN CAPITAL LETTER E WITH ACUTE" }, |
---|
1140 | { 0x00CA, "Ecirc", "LATIN CAPITAL LETTER E WITH CIRCUMFLEX" }, |
---|
1141 | { 0x00CB, "Euml", "LATIN CAPITAL LETTER E WITH DIAERESIS" }, |
---|
1142 | { 0x00CC, "Igrave", "LATIN CAPITAL LETTER I WITH GRAVE" }, |
---|
1143 | { 0x00CD, "Iacute", "LATIN CAPITAL LETTER I WITH ACUTE" }, |
---|
1144 | { 0x00CE, "Icirc", "LATIN CAPITAL LETTER I WITH CIRCUMFLEX" }, |
---|
1145 | { 0x00CF, "Iuml", "LATIN CAPITAL LETTER I WITH DIAERESIS" }, |
---|
1146 | { 0x00D0, "ETH", "LATIN CAPITAL LETTER ETH" }, |
---|
1147 | { 0x00D1, "Ntilde", "LATIN CAPITAL LETTER N WITH TILDE" }, |
---|
1148 | { 0x00D2, "Ograve", "LATIN CAPITAL LETTER O WITH GRAVE" }, |
---|
1149 | { 0x00D3, "Oacute", "LATIN CAPITAL LETTER O WITH ACUTE" }, |
---|
1150 | { 0x00D4, "Ocirc", "LATIN CAPITAL LETTER O WITH CIRCUMFLEX" }, |
---|
1151 | { 0x00D5, "Otilde", "LATIN CAPITAL LETTER O WITH TILDE" }, |
---|
1152 | { 0x00D6, "Ouml", "LATIN CAPITAL LETTER O WITH DIAERESIS" }, |
---|
1153 | { 0x00D7, "times", "MULTIPLICATION SIGN" }, |
---|
1154 | { 0x00D8, "Oslash", "LATIN CAPITAL LETTER O WITH STROKE" }, |
---|
1155 | { 0x00D9, "Ugrave", "LATIN CAPITAL LETTER U WITH GRAVE" }, |
---|
1156 | { 0x00DA, "Uacute", "LATIN CAPITAL LETTER U WITH ACUTE" }, |
---|
1157 | { 0x00DB, "Ucirc", "LATIN CAPITAL LETTER U WITH CIRCUMFLEX" }, |
---|
1158 | { 0x00DC, "Uuml", "LATIN CAPITAL LETTER U WITH DIAERESIS" }, |
---|
1159 | { 0x00DD, "Yacute", "LATIN CAPITAL LETTER Y WITH ACUTE" }, |
---|
1160 | { 0x00DE, "THORN", "LATIN CAPITAL LETTER THORN" }, |
---|
1161 | { 0x00DF, "szlig", "LATIN SMALL LETTER SHARP S" }, |
---|
1162 | { 0x00E0, "agrave", "LATIN SMALL LETTER A WITH GRAVE" }, |
---|
1163 | { 0x00E1, "aacute", "LATIN SMALL LETTER A WITH ACUTE" }, |
---|
1164 | { 0x00E2, "acirc", "LATIN SMALL LETTER A WITH CIRCUMFLEX" }, |
---|
1165 | { 0x00E3, "atilde", "LATIN SMALL LETTER A WITH TILDE" }, |
---|
1166 | { 0x00E4, "auml", "LATIN SMALL LETTER A WITH DIAERESIS" }, |
---|
1167 | { 0x00E5, "aring", "LATIN SMALL LETTER A WITH RING ABOVE" }, |
---|
1168 | { 0x00E6, "aelig", "LATIN SMALL LETTER AE" }, |
---|
1169 | { 0x00E7, "ccedil", "LATIN SMALL LETTER C WITH CEDILLA" }, |
---|
1170 | { 0x00E8, "egrave", "LATIN SMALL LETTER E WITH GRAVE" }, |
---|
1171 | { 0x00E9, "eacute", "LATIN SMALL LETTER E WITH ACUTE" }, |
---|
1172 | { 0x00EA, "ecirc", "LATIN SMALL LETTER E WITH CIRCUMFLEX" }, |
---|
1173 | { 0x00EB, "euml", "LATIN SMALL LETTER E WITH DIAERESIS" }, |
---|
1174 | { 0x00EC, "igrave", "LATIN SMALL LETTER I WITH GRAVE" }, |
---|
1175 | { 0x00ED, "iacute", "LATIN SMALL LETTER I WITH ACUTE" }, |
---|
1176 | { 0x00EE, "icirc", "LATIN SMALL LETTER I WITH CIRCUMFLEX" }, |
---|
1177 | { 0x00EF, "iuml", "LATIN SMALL LETTER I WITH DIAERESIS" }, |
---|
1178 | { 0x00F0, "eth", "LATIN SMALL LETTER ETH" }, |
---|
1179 | { 0x00F1, "ntilde", "LATIN SMALL LETTER N WITH TILDE" }, |
---|
1180 | { 0x00F2, "ograve", "LATIN SMALL LETTER O WITH GRAVE" }, |
---|
1181 | { 0x00F3, "oacute", "LATIN SMALL LETTER O WITH ACUTE" }, |
---|
1182 | { 0x00F4, "ocirc", "LATIN SMALL LETTER O WITH CIRCUMFLEX" }, |
---|
1183 | { 0x00F5, "otilde", "LATIN SMALL LETTER O WITH TILDE" }, |
---|
1184 | { 0x00F6, "ouml", "LATIN SMALL LETTER O WITH DIAERESIS" }, |
---|
1185 | { 0x00F7, "divide", "DIVISION SIGN" }, |
---|
1186 | { 0x00F8, "oslash", "CIRCLED DIVISION SLASH" }, |
---|
1187 | { 0x00F9, "ugrave", "LATIN SMALL LETTER U WITH GRAVE" }, |
---|
1188 | { 0x00FA, "uacute", "LATIN SMALL LETTER U WITH ACUTE" }, |
---|
1189 | { 0x00FB, "ucirc", "LATIN SMALL LETTER U WITH CIRCUMFLEX" }, |
---|
1190 | { 0x00FC, "uuml", "LATIN SMALL LETTER U WITH DIAERESIS" }, |
---|
1191 | { 0x00FD, "yacute", "LATIN SMALL LETTER Y WITH ACUTE" }, |
---|
1192 | { 0x00FE, "thorn", "LATIN SMALL LETTER THORN" }, |
---|
1193 | { 0x00FF, "yuml", "LATIN SMALL LETTER Y WITH DIAERESIS" }, |
---|
1194 | { 0x0100, "Amacr", "LATIN CAPITAL LETTER A WITH MACRON" }, |
---|
1195 | { 0x0101, "amacr", "LATIN SMALL LETTER A WITH MACRON" }, |
---|
1196 | { 0x0102, "Abreve", "LATIN CAPITAL LETTER A WITH BREVE" }, |
---|
1197 | { 0x0103, "abreve", "LATIN SMALL LETTER A WITH BREVE" }, |
---|
1198 | { 0x0104, "Aogon", "LATIN CAPITAL LETTER A WITH OGONEK" }, |
---|
1199 | { 0x0105, "aogon", "LATIN SMALL LETTER A WITH OGONEK" }, |
---|
1200 | { 0x0106, "Cacute", "LATIN CAPITAL LETTER C WITH ACUTE" }, |
---|
1201 | { 0x0107, "cacute", "LATIN SMALL LETTER C WITH ACUTE" }, |
---|
1202 | { 0x0108, "Ccirc", "LATIN CAPITAL LETTER C WITH CIRCUMFLEX" }, |
---|
1203 | { 0x0109, "ccirc", "LATIN SMALL LETTER C WITH CIRCUMFLEX" }, |
---|
1204 | { 0x010A, "Cdot", "LATIN CAPITAL LETTER C WITH DOT ABOVE" }, |
---|
1205 | { 0x010B, "cdot", "DOT OPERATOR" }, |
---|
1206 | { 0x010C, "Ccaron", "LATIN CAPITAL LETTER C WITH CARON" }, |
---|
1207 | { 0x010D, "ccaron", "LATIN SMALL LETTER C WITH CARON" }, |
---|
1208 | { 0x010E, "Dcaron", "LATIN CAPITAL LETTER D WITH CARON" }, |
---|
1209 | { 0x010F, "dcaron", "LATIN SMALL LETTER D WITH CARON" }, |
---|
1210 | { 0x0110, "Dstrok", "LATIN CAPITAL LETTER D WITH STROKE" }, |
---|
1211 | { 0x0111, "dstrok", "LATIN SMALL LETTER D WITH STROKE" }, |
---|
1212 | { 0x0112, "Emacr", "LATIN CAPITAL LETTER E WITH MACRON" }, |
---|
1213 | { 0x0113, "emacr", "LATIN SMALL LETTER E WITH MACRON" }, |
---|
1214 | { 0x0116, "Edot", "LATIN CAPITAL LETTER E WITH DOT ABOVE" }, |
---|
1215 | { 0x0117, "edot", "LATIN SMALL LETTER E WITH DOT ABOVE" }, |
---|
1216 | { 0x0118, "Eogon", "LATIN CAPITAL LETTER E WITH OGONEK" }, |
---|
1217 | { 0x0119, "eogon", "LATIN SMALL LETTER E WITH OGONEK" }, |
---|
1218 | { 0x011A, "Ecaron", "LATIN CAPITAL LETTER E WITH CARON" }, |
---|
1219 | { 0x011B, "ecaron", "LATIN SMALL LETTER E WITH CARON" }, |
---|
1220 | { 0x011C, "Gcirc", "LATIN CAPITAL LETTER G WITH CIRCUMFLEX" }, |
---|
1221 | { 0x011D, "gcirc", "LATIN SMALL LETTER G WITH CIRCUMFLEX" }, |
---|
1222 | { 0x011E, "Gbreve", "LATIN CAPITAL LETTER G WITH BREVE" }, |
---|
1223 | { 0x011F, "gbreve", "LATIN SMALL LETTER G WITH BREVE" }, |
---|
1224 | { 0x0120, "Gdot", "LATIN CAPITAL LETTER G WITH DOT ABOVE" }, |
---|
1225 | { 0x0121, "gdot", "LATIN SMALL LETTER G WITH DOT ABOVE" }, |
---|
1226 | { 0x0122, "Gcedil", "LATIN CAPITAL LETTER G WITH CEDILLA" }, |
---|
1227 | { 0x0124, "Hcirc", "LATIN CAPITAL LETTER H WITH CIRCUMFLEX" }, |
---|
1228 | { 0x0125, "hcirc", "LATIN SMALL LETTER H WITH CIRCUMFLEX" }, |
---|
1229 | { 0x0126, "Hstrok", "LATIN CAPITAL LETTER H WITH STROKE" }, |
---|
1230 | { 0x0127, "hstrok", "LATIN SMALL LETTER H WITH STROKE" }, |
---|
1231 | { 0x0128, "Itilde", "LATIN CAPITAL LETTER I WITH TILDE" }, |
---|
1232 | { 0x0129, "itilde", "LATIN SMALL LETTER I WITH TILDE" }, |
---|
1233 | { 0x012A, "Imacr", "LATIN CAPITAL LETTER I WITH MACRON" }, |
---|
1234 | { 0x012B, "imacr", "LATIN SMALL LETTER I WITH MACRON" }, |
---|
1235 | { 0x012E, "Iogon", "LATIN CAPITAL LETTER I WITH OGONEK" }, |
---|
1236 | { 0x012F, "iogon", "LATIN SMALL LETTER I WITH OGONEK" }, |
---|
1237 | { 0x0130, "Idot", "LATIN CAPITAL LETTER I WITH DOT ABOVE" }, |
---|
1238 | { 0x0131, "inodot", "LATIN SMALL LETTER DOTLESS I" }, |
---|
1239 | { 0x0131, "inodot", "LATIN SMALL LETTER DOTLESS I" }, |
---|
1240 | { 0x0132, "IJlig", "LATIN CAPITAL LIGATURE IJ" }, |
---|
1241 | { 0x0133, "ijlig", "LATIN SMALL LIGATURE IJ" }, |
---|
1242 | { 0x0134, "Jcirc", "LATIN CAPITAL LETTER J WITH CIRCUMFLEX" }, |
---|
1243 | { 0x0135, "jcirc", "LATIN SMALL LETTER J WITH CIRCUMFLEX" }, |
---|
1244 | { 0x0136, "Kcedil", "LATIN CAPITAL LETTER K WITH CEDILLA" }, |
---|
1245 | { 0x0137, "kcedil", "LATIN SMALL LETTER K WITH CEDILLA" }, |
---|
1246 | { 0x0138, "kgreen", "LATIN SMALL LETTER KRA" }, |
---|
1247 | { 0x0139, "Lacute", "LATIN CAPITAL LETTER L WITH ACUTE" }, |
---|
1248 | { 0x013A, "lacute", "LATIN SMALL LETTER L WITH ACUTE" }, |
---|
1249 | { 0x013B, "Lcedil", "LATIN CAPITAL LETTER L WITH CEDILLA" }, |
---|
1250 | { 0x013C, "lcedil", "LATIN SMALL LETTER L WITH CEDILLA" }, |
---|
1251 | { 0x013D, "Lcaron", "LATIN CAPITAL LETTER L WITH CARON" }, |
---|
1252 | { 0x013E, "lcaron", "LATIN SMALL LETTER L WITH CARON" }, |
---|
1253 | { 0x013F, "Lmidot", "LATIN CAPITAL LETTER L WITH MIDDLE DOT" }, |
---|
1254 | { 0x0140, "lmidot", "LATIN SMALL LETTER L WITH MIDDLE DOT" }, |
---|
1255 | { 0x0141, "Lstrok", "LATIN CAPITAL LETTER L WITH STROKE" }, |
---|
1256 | { 0x0142, "lstrok", "LATIN SMALL LETTER L WITH STROKE" }, |
---|
1257 | { 0x0143, "Nacute", "LATIN CAPITAL LETTER N WITH ACUTE" }, |
---|
1258 | { 0x0144, "nacute", "LATIN SMALL LETTER N WITH ACUTE" }, |
---|
1259 | { 0x0145, "Ncedil", "LATIN CAPITAL LETTER N WITH CEDILLA" }, |
---|
1260 | { 0x0146, "ncedil", "LATIN SMALL LETTER N WITH CEDILLA" }, |
---|
1261 | { 0x0147, "Ncaron", "LATIN CAPITAL LETTER N WITH CARON" }, |
---|
1262 | { 0x0148, "ncaron", "LATIN SMALL LETTER N WITH CARON" }, |
---|
1263 | { 0x0149, "napos", "LATIN SMALL LETTER N PRECEDED BY APOSTROPHE" }, |
---|
1264 | { 0x014A, "ENG", "LATIN CAPITAL LETTER ENG" }, |
---|
1265 | { 0x014B, "eng", "LATIN SMALL LETTER ENG" }, |
---|
1266 | { 0x014C, "Omacr", "LATIN CAPITAL LETTER O WITH MACRON" }, |
---|
1267 | { 0x014D, "omacr", "LATIN SMALL LETTER O WITH MACRON" }, |
---|
1268 | { 0x0150, "Odblac", "LATIN CAPITAL LETTER O WITH DOUBLE ACUTE" }, |
---|
1269 | { 0x0151, "odblac", "LATIN SMALL LETTER O WITH DOUBLE ACUTE" }, |
---|
1270 | { 0x0152, "OElig", "LATIN CAPITAL LIGATURE OE" }, |
---|
1271 | { 0x0153, "oelig", "LATIN SMALL LIGATURE OE" }, |
---|
1272 | { 0x0154, "Racute", "LATIN CAPITAL LETTER R WITH ACUTE" }, |
---|
1273 | { 0x0155, "racute", "LATIN SMALL LETTER R WITH ACUTE" }, |
---|
1274 | { 0x0156, "Rcedil", "LATIN CAPITAL LETTER R WITH CEDILLA" }, |
---|
1275 | { 0x0157, "rcedil", "LATIN SMALL LETTER R WITH CEDILLA" }, |
---|
1276 | { 0x0158, "Rcaron", "LATIN CAPITAL LETTER R WITH CARON" }, |
---|
1277 | { 0x0159, "rcaron", "LATIN SMALL LETTER R WITH CARON" }, |
---|
1278 | { 0x015A, "Sacute", "LATIN CAPITAL LETTER S WITH ACUTE" }, |
---|
1279 | { 0x015B, "sacute", "LATIN SMALL LETTER S WITH ACUTE" }, |
---|
1280 | { 0x015C, "Scirc", "LATIN CAPITAL LETTER S WITH CIRCUMFLEX" }, |
---|
1281 | { 0x015D, "scirc", "LATIN SMALL LETTER S WITH CIRCUMFLEX" }, |
---|
1282 | { 0x015E, "Scedil", "LATIN CAPITAL LETTER S WITH CEDILLA" }, |
---|
1283 | { 0x015F, "scedil", "LATIN SMALL LETTER S WITH CEDILLA" }, |
---|
1284 | { 0x0160, "Scaron", "LATIN CAPITAL LETTER S WITH CARON" }, |
---|
1285 | { 0x0161, "scaron", "LATIN SMALL LETTER S WITH CARON" }, |
---|
1286 | { 0x0162, "Tcedil", "LATIN CAPITAL LETTER T WITH CEDILLA" }, |
---|
1287 | { 0x0163, "tcedil", "LATIN SMALL LETTER T WITH CEDILLA" }, |
---|
1288 | { 0x0164, "Tcaron", "LATIN CAPITAL LETTER T WITH CARON" }, |
---|
1289 | { 0x0165, "tcaron", "LATIN SMALL LETTER T WITH CARON" }, |
---|
1290 | { 0x0166, "Tstrok", "LATIN CAPITAL LETTER T WITH STROKE" }, |
---|
1291 | { 0x0167, "tstrok", "LATIN SMALL LETTER T WITH STROKE" }, |
---|
1292 | { 0x0168, "Utilde", "LATIN CAPITAL LETTER U WITH TILDE" }, |
---|
1293 | { 0x0169, "utilde", "LATIN SMALL LETTER U WITH TILDE" }, |
---|
1294 | { 0x016A, "Umacr", "LATIN CAPITAL LETTER U WITH MACRON" }, |
---|
1295 | { 0x016B, "umacr", "LATIN SMALL LETTER U WITH MACRON" }, |
---|
1296 | { 0x016C, "Ubreve", "LATIN CAPITAL LETTER U WITH BREVE" }, |
---|
1297 | { 0x016D, "ubreve", "LATIN SMALL LETTER U WITH BREVE" }, |
---|
1298 | { 0x016E, "Uring", "LATIN CAPITAL LETTER U WITH RING ABOVE" }, |
---|
1299 | { 0x016F, "uring", "LATIN SMALL LETTER U WITH RING ABOVE" }, |
---|
1300 | { 0x0170, "Udblac", "LATIN CAPITAL LETTER U WITH DOUBLE ACUTE" }, |
---|
1301 | { 0x0171, "udblac", "LATIN SMALL LETTER U WITH DOUBLE ACUTE" }, |
---|
1302 | { 0x0172, "Uogon", "LATIN CAPITAL LETTER U WITH OGONEK" }, |
---|
1303 | { 0x0173, "uogon", "LATIN SMALL LETTER U WITH OGONEK" }, |
---|
1304 | { 0x0174, "Wcirc", "LATIN CAPITAL LETTER W WITH CIRCUMFLEX" }, |
---|
1305 | { 0x0175, "wcirc", "LATIN SMALL LETTER W WITH CIRCUMFLEX" }, |
---|
1306 | { 0x0176, "Ycirc", "LATIN CAPITAL LETTER Y WITH CIRCUMFLEX" }, |
---|
1307 | { 0x0177, "ycirc", "LATIN SMALL LETTER Y WITH CIRCUMFLEX" }, |
---|
1308 | { 0x0178, "Yuml", "LATIN CAPITAL LETTER Y WITH DIAERESIS" }, |
---|
1309 | { 0x0179, "Zacute", "LATIN CAPITAL LETTER Z WITH ACUTE" }, |
---|
1310 | { 0x017A, "zacute", "LATIN SMALL LETTER Z WITH ACUTE" }, |
---|
1311 | { 0x017B, "Zdot", "LATIN CAPITAL LETTER Z WITH DOT ABOVE" }, |
---|
1312 | { 0x017C, "zdot", "LATIN SMALL LETTER Z WITH DOT ABOVE" }, |
---|
1313 | { 0x017D, "Zcaron", "LATIN CAPITAL LETTER Z WITH CARON" }, |
---|
1314 | { 0x017E, "zcaron", "LATIN SMALL LETTER Z WITH CARON" }, |
---|
1315 | { 0x0192, "fnof", "LATIN SMALL LETTER F WITH HOOK" }, |
---|
1316 | { 0x01F5, "gacute", "LATIN SMALL LETTER G WITH ACUTE" }, |
---|
1317 | { 0x02C7, "caron", "CARON" }, |
---|
1318 | { 0x02D8, "breve", "BREVE" }, |
---|
1319 | { 0x02D9, "dot", "DOT ABOVE" }, |
---|
1320 | { 0x02DA, "ring", "RING ABOVE" }, |
---|
1321 | { 0x02DB, "ogon", "OGONEK" }, |
---|
1322 | { 0x02DC, "tilde", "TILDE" }, |
---|
1323 | { 0x02DD, "dblac", "DOUBLE ACUTE ACCENT" }, |
---|
1324 | { 0x0386, "Aacgr", "GREEK CAPITAL LETTER ALPHA WITH TONOS" }, |
---|
1325 | { 0x0388, "Eacgr", "GREEK CAPITAL LETTER EPSILON WITH TONOS" }, |
---|
1326 | { 0x0389, "EEacgr", "GREEK CAPITAL LETTER ETA WITH TONOS" }, |
---|
1327 | { 0x038A, "Iacgr", "GREEK CAPITAL LETTER IOTA WITH TONOS" }, |
---|
1328 | { 0x038C, "Oacgr", "GREEK CAPITAL LETTER OMICRON WITH TONOS" }, |
---|
1329 | { 0x038E, "Uacgr", "GREEK CAPITAL LETTER UPSILON WITH TONOS" }, |
---|
1330 | { 0x038F, "OHacgr", "GREEK CAPITAL LETTER OMEGA WITH TONOS" }, |
---|
1331 | { 0x0390, "idiagr", "GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS" }, |
---|
1332 | { 0x0391, "Agr", "GREEK CAPITAL LETTER ALPHA" }, |
---|
1333 | { 0x0392, "Bgr", "GREEK CAPITAL LETTER BETA" }, |
---|
1334 | { 0x0393, "b.Gamma", "GREEK CAPITAL LETTER GAMMA" }, |
---|
1335 | { 0x0393, "Gamma", "GREEK CAPITAL LETTER GAMMA" }, |
---|
1336 | { 0x0393, "Ggr", "GREEK CAPITAL LETTER GAMMA" }, |
---|
1337 | { 0x0394, "b.Delta", "GREEK CAPITAL LETTER DELTA" }, |
---|
1338 | { 0x0394, "Delta", "GREEK CAPITAL LETTER DELTA" }, |
---|
1339 | { 0x0394, "Dgr", "GREEK CAPITAL LETTER DELTA" }, |
---|
1340 | { 0x0395, "Egr", "GREEK CAPITAL LETTER EPSILON" }, |
---|
1341 | { 0x0396, "Zgr", "GREEK CAPITAL LETTER ZETA" }, |
---|
1342 | { 0x0397, "EEgr", "GREEK CAPITAL LETTER ETA" }, |
---|
1343 | { 0x0398, "b.Theta", "GREEK CAPITAL LETTER THETA" }, |
---|
1344 | { 0x0398, "Theta", "GREEK CAPITAL LETTER THETA" }, |
---|
1345 | { 0x0398, "THgr", "GREEK CAPITAL LETTER THETA" }, |
---|
1346 | { 0x0399, "Igr", "GREEK CAPITAL LETTER IOTA" }, |
---|
1347 | { 0x039A, "Kgr", "GREEK CAPITAL LETTER KAPPA" }, |
---|
1348 | { 0x039B, "b.Lambda", "GREEK CAPITAL LETTER LAMDA" }, |
---|
1349 | { 0x039B, "Lambda", "GREEK CAPITAL LETTER LAMDA" }, |
---|
1350 | { 0x039B, "Lgr", "GREEK CAPITAL LETTER LAMDA" }, |
---|
1351 | { 0x039C, "Mgr", "GREEK CAPITAL LETTER MU" }, |
---|
1352 | { 0x039D, "Ngr", "GREEK CAPITAL LETTER NU" }, |
---|
1353 | { 0x039E, "b.Xi", "GREEK CAPITAL LETTER XI" }, |
---|
1354 | { 0x039E, "Xgr", "GREEK CAPITAL LETTER XI" }, |
---|
1355 | { 0x039E, "Xi", "GREEK CAPITAL LETTER XI" }, |
---|
1356 | { 0x039F, "Ogr", "GREEK CAPITAL LETTER OMICRON" }, |
---|
1357 | { 0x03A0, "b.Pi", "GREEK CAPITAL LETTER PI" }, |
---|
1358 | { 0x03A0, "Pgr", "GREEK CAPITAL LETTER PI" }, |
---|
1359 | { 0x03A0, "Pi", "GREEK CAPITAL LETTER PI" }, |
---|
1360 | { 0x03A1, "Rgr", "GREEK CAPITAL LETTER RHO" }, |
---|
1361 | { 0x03A3, "b.Sigma", "GREEK CAPITAL LETTER SIGMA" }, |
---|
1362 | { 0x03A3, "Sgr", "GREEK CAPITAL LETTER SIGMA" }, |
---|
1363 | { 0x03A3, "Sigma", "GREEK CAPITAL LETTER SIGMA" }, |
---|
1364 | { 0x03A4, "Tgr", "GREEK CAPITAL LETTER TAU" }, |
---|
1365 | { 0x03A5, "Ugr", "" }, |
---|
1366 | { 0x03A6, "b.Phi", "GREEK CAPITAL LETTER PHI" }, |
---|
1367 | { 0x03A6, "PHgr", "GREEK CAPITAL LETTER PHI" }, |
---|
1368 | { 0x03A6, "Phi", "GREEK CAPITAL LETTER PHI" }, |
---|
1369 | { 0x03A7, "KHgr", "GREEK CAPITAL LETTER CHI" }, |
---|
1370 | { 0x03A8, "b.Psi", "GREEK CAPITAL LETTER PSI" }, |
---|
1371 | { 0x03A8, "PSgr", "GREEK CAPITAL LETTER PSI" }, |
---|
1372 | { 0x03A8, "Psi", "GREEK CAPITAL LETTER PSI" }, |
---|
1373 | { 0x03A9, "b.Omega", "GREEK CAPITAL LETTER OMEGA" }, |
---|
1374 | { 0x03A9, "OHgr", "GREEK CAPITAL LETTER OMEGA" }, |
---|
1375 | { 0x03A9, "Omega", "GREEK CAPITAL LETTER OMEGA" }, |
---|
1376 | { 0x03AA, "Idigr", "GREEK CAPITAL LETTER IOTA WITH DIALYTIKA" }, |
---|
1377 | { 0x03AB, "Udigr", "GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA" }, |
---|
1378 | { 0x03AC, "aacgr", "GREEK SMALL LETTER ALPHA WITH TONOS" }, |
---|
1379 | { 0x03AD, "eacgr", "GREEK SMALL LETTER EPSILON WITH TONOS" }, |
---|
1380 | { 0x03AE, "eeacgr", "GREEK SMALL LETTER ETA WITH TONOS" }, |
---|
1381 | { 0x03AF, "iacgr", "GREEK SMALL LETTER IOTA WITH TONOS" }, |
---|
1382 | { 0x03B0, "udiagr", "GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS" }, |
---|
1383 | { 0x03B1, "agr", "" }, |
---|
1384 | { 0x03B1, "alpha", "" }, |
---|
1385 | { 0x03B1, "b.alpha", "" }, |
---|
1386 | { 0x03B2, "b.beta", "GREEK SMALL LETTER BETA" }, |
---|
1387 | { 0x03B2, "beta", "GREEK SMALL LETTER BETA" }, |
---|
1388 | { 0x03B2, "bgr", "GREEK SMALL LETTER BETA" }, |
---|
1389 | { 0x03B3, "b.gamma", "GREEK SMALL LETTER GAMMA" }, |
---|
1390 | { 0x03B3, "gamma", "GREEK SMALL LETTER GAMMA" }, |
---|
1391 | { 0x03B3, "ggr", "GREEK SMALL LETTER GAMMA" }, |
---|
1392 | { 0x03B4, "b.delta", "GREEK SMALL LETTER DELTA" }, |
---|
1393 | { 0x03B4, "delta", "GREEK SMALL LETTER DELTA" }, |
---|
1394 | { 0x03B4, "dgr", "GREEK SMALL LETTER DELTA" }, |
---|
1395 | { 0x03B5, "b.epsi", "" }, |
---|
1396 | { 0x03B5, "b.epsis", "" }, |
---|
1397 | { 0x03B5, "b.epsiv", "" }, |
---|
1398 | { 0x03B5, "egr", "" }, |
---|
1399 | { 0x03B5, "epsiv", "" }, |
---|
1400 | { 0x03B6, "b.zeta", "GREEK SMALL LETTER ZETA" }, |
---|
1401 | { 0x03B6, "zeta", "GREEK SMALL LETTER ZETA" }, |
---|
1402 | { 0x03B6, "zgr", "GREEK SMALL LETTER ZETA" }, |
---|
1403 | { 0x03B7, "b.eta", "GREEK SMALL LETTER ETA" }, |
---|
1404 | { 0x03B7, "eegr", "GREEK SMALL LETTER ETA" }, |
---|
1405 | { 0x03B7, "eta", "GREEK SMALL LETTER ETA" }, |
---|
1406 | { 0x03B8, "b.thetas", "" }, |
---|
1407 | { 0x03B8, "thetas", "" }, |
---|
1408 | { 0x03B8, "thgr", "" }, |
---|
1409 | { 0x03B9, "b.iota", "GREEK SMALL LETTER IOTA" }, |
---|
1410 | { 0x03B9, "igr", "GREEK SMALL LETTER IOTA" }, |
---|
1411 | { 0x03B9, "iota", "GREEK SMALL LETTER IOTA" }, |
---|
1412 | { 0x03BA, "b.kappa", "GREEK SMALL LETTER KAPPA" }, |
---|
1413 | { 0x03BA, "kappa", "GREEK SMALL LETTER KAPPA" }, |
---|
1414 | { 0x03BA, "kgr", "GREEK SMALL LETTER KAPPA" }, |
---|
1415 | { 0x03BB, "b.lambda", "GREEK SMALL LETTER LAMDA" }, |
---|
1416 | { 0x03BB, "lambda", "GREEK SMALL LETTER LAMDA" }, |
---|
1417 | { 0x03BB, "lgr", "GREEK SMALL LETTER LAMDA" }, |
---|
1418 | { 0x03BC, "b.mu", "GREEK SMALL LETTER MU" }, |
---|
1419 | { 0x03BC, "mgr", "GREEK SMALL LETTER MU" }, |
---|
1420 | { 0x03BC, "mu", "GREEK SMALL LETTER MU" }, |
---|
1421 | { 0x03BD, "b.nu", "GREEK SMALL LETTER NU" }, |
---|
1422 | { 0x03BD, "ngr", "GREEK SMALL LETTER NU" }, |
---|
1423 | { 0x03BD, "nu", "GREEK SMALL LETTER NU" }, |
---|
1424 | { 0x03BE, "b.xi", "GREEK SMALL LETTER XI" }, |
---|
1425 | { 0x03BE, "xgr", "GREEK SMALL LETTER XI" }, |
---|
1426 | { 0x03BE, "xi", "GREEK SMALL LETTER XI" }, |
---|
1427 | { 0x03BF, "ogr", "GREEK SMALL LETTER OMICRON" }, |
---|
1428 | { 0x03C0, "b.pi", "GREEK SMALL LETTER PI" }, |
---|
1429 | { 0x03C0, "pgr", "GREEK SMALL LETTER PI" }, |
---|
1430 | { 0x03C0, "pi", "GREEK SMALL LETTER PI" }, |
---|
1431 | { 0x03C1, "b.rho", "GREEK SMALL LETTER RHO" }, |
---|
1432 | { 0x03C1, "rgr", "GREEK SMALL LETTER RHO" }, |
---|
1433 | { 0x03C1, "rho", "GREEK SMALL LETTER RHO" }, |
---|
1434 | { 0x03C2, "b.sigmav", "" }, |
---|
1435 | { 0x03C2, "sfgr", "" }, |
---|
1436 | { 0x03C2, "sigmav", "" }, |
---|
1437 | { 0x03C3, "b.sigma", "GREEK SMALL LETTER SIGMA" }, |
---|
1438 | { 0x03C3, "sgr", "GREEK SMALL LETTER SIGMA" }, |
---|
1439 | { 0x03C3, "sigma", "GREEK SMALL LETTER SIGMA" }, |
---|
1440 | { 0x03C4, "b.tau", "GREEK SMALL LETTER TAU" }, |
---|
1441 | { 0x03C4, "tau", "GREEK SMALL LETTER TAU" }, |
---|
1442 | { 0x03C4, "tgr", "GREEK SMALL LETTER TAU" }, |
---|
1443 | { 0x03C5, "b.upsi", "GREEK SMALL LETTER UPSILON" }, |
---|
1444 | { 0x03C5, "ugr", "GREEK SMALL LETTER UPSILON" }, |
---|
1445 | { 0x03C5, "upsi", "GREEK SMALL LETTER UPSILON" }, |
---|
1446 | { 0x03C6, "b.phis", "GREEK SMALL LETTER PHI" }, |
---|
1447 | { 0x03C6, "phgr", "GREEK SMALL LETTER PHI" }, |
---|
1448 | { 0x03C6, "phis", "GREEK SMALL LETTER PHI" }, |
---|
1449 | { 0x03C7, "b.chi", "GREEK SMALL LETTER CHI" }, |
---|
1450 | { 0x03C7, "chi", "GREEK SMALL LETTER CHI" }, |
---|
1451 | { 0x03C7, "khgr", "GREEK SMALL LETTER CHI" }, |
---|
1452 | { 0x03C8, "b.psi", "GREEK SMALL LETTER PSI" }, |
---|
1453 | { 0x03C8, "psgr", "GREEK SMALL LETTER PSI" }, |
---|
1454 | { 0x03C8, "psi", "GREEK SMALL LETTER PSI" }, |
---|
1455 | { 0x03C9, "b.omega", "GREEK SMALL LETTER OMEGA" }, |
---|
1456 | { 0x03C9, "ohgr", "GREEK SMALL LETTER OMEGA" }, |
---|
1457 | { 0x03C9, "omega", "GREEK SMALL LETTER OMEGA" }, |
---|
1458 | { 0x03CA, "idigr", "GREEK SMALL LETTER IOTA WITH DIALYTIKA" }, |
---|
1459 | { 0x03CB, "udigr", "GREEK SMALL LETTER UPSILON WITH DIALYTIKA" }, |
---|
1460 | { 0x03CC, "oacgr", "GREEK SMALL LETTER OMICRON WITH TONOS" }, |
---|
1461 | { 0x03CD, "uacgr", "GREEK SMALL LETTER UPSILON WITH TONOS" }, |
---|
1462 | { 0x03CE, "ohacgr", "GREEK SMALL LETTER OMEGA WITH TONOS" }, |
---|
1463 | { 0x03D1, "b.thetav", "" }, |
---|
1464 | { 0x03D1, "thetav", "" }, |
---|
1465 | { 0x03D2, "b.Upsi", "" }, |
---|
1466 | { 0x03D2, "Upsi", "" }, |
---|
1467 | { 0x03D5, "b.phiv", "GREEK PHI SYMBOL" }, |
---|
1468 | { 0x03D5, "phiv", "GREEK PHI SYMBOL" }, |
---|
1469 | { 0x03D6, "b.piv", "GREEK PI SYMBOL" }, |
---|
1470 | { 0x03D6, "piv", "GREEK PI SYMBOL" }, |
---|
1471 | { 0x03DC, "b.gammad", "GREEK LETTER DIGAMMA" }, |
---|
1472 | { 0x03DC, "gammad", "GREEK LETTER DIGAMMA" }, |
---|
1473 | { 0x03F0, "b.kappav", "GREEK KAPPA SYMBOL" }, |
---|
1474 | { 0x03F0, "kappav", "GREEK KAPPA SYMBOL" }, |
---|
1475 | { 0x03F1, "b.rhov", "GREEK RHO SYMBOL" }, |
---|
1476 | { 0x03F1, "rhov", "GREEK RHO SYMBOL" }, |
---|
1477 | { 0x0401, "IOcy", "CYRILLIC CAPITAL LETTER IO" }, |
---|
1478 | { 0x0402, "DJcy", "CYRILLIC CAPITAL LETTER DJE" }, |
---|
1479 | { 0x0403, "GJcy", "CYRILLIC CAPITAL LETTER GJE" }, |
---|
1480 | { 0x0404, "Jukcy", "CYRILLIC CAPITAL LETTER UKRAINIAN IE" }, |
---|
1481 | { 0x0405, "DScy", "CYRILLIC CAPITAL LETTER DZE" }, |
---|
1482 | { 0x0406, "Iukcy", "CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I" }, |
---|
1483 | { 0x0407, "YIcy", "CYRILLIC CAPITAL LETTER YI" }, |
---|
1484 | { 0x0408, "Jsercy", "CYRILLIC CAPITAL LETTER JE" }, |
---|
1485 | { 0x0409, "LJcy", "CYRILLIC CAPITAL LETTER LJE" }, |
---|
1486 | { 0x040A, "NJcy", "CYRILLIC CAPITAL LETTER NJE" }, |
---|
1487 | { 0x040B, "TSHcy", "CYRILLIC CAPITAL LETTER TSHE" }, |
---|
1488 | { 0x040C, "KJcy", "CYRILLIC CAPITAL LETTER KJE" }, |
---|
1489 | { 0x040E, "Ubrcy", "CYRILLIC CAPITAL LETTER SHORT U" }, |
---|
1490 | { 0x040F, "DZcy", "CYRILLIC CAPITAL LETTER DZHE" }, |
---|
1491 | { 0x0410, "Acy", "CYRILLIC CAPITAL LETTER A" }, |
---|
1492 | { 0x0411, "Bcy", "CYRILLIC CAPITAL LETTER BE" }, |
---|
1493 | { 0x0412, "Vcy", "CYRILLIC CAPITAL LETTER VE" }, |
---|
1494 | { 0x0413, "Gcy", "CYRILLIC CAPITAL LETTER GHE" }, |
---|
1495 | { 0x0414, "Dcy", "CYRILLIC CAPITAL LETTER DE" }, |
---|
1496 | { 0x0415, "IEcy", "CYRILLIC CAPITAL LETTER IE" }, |
---|
1497 | { 0x0416, "ZHcy", "CYRILLIC CAPITAL LETTER ZHE" }, |
---|
1498 | { 0x0417, "Zcy", "CYRILLIC CAPITAL LETTER ZE" }, |
---|
1499 | { 0x0418, "Icy", "CYRILLIC CAPITAL LETTER I" }, |
---|
1500 | { 0x0419, "Jcy", "CYRILLIC CAPITAL LETTER SHORT I" }, |
---|
1501 | { 0x041A, "Kcy", "CYRILLIC CAPITAL LETTER KA" }, |
---|
1502 | { 0x041B, "Lcy", "CYRILLIC CAPITAL LETTER EL" }, |
---|
1503 | { 0x041C, "Mcy", "CYRILLIC CAPITAL LETTER EM" }, |
---|
1504 | { 0x041D, "Ncy", "CYRILLIC CAPITAL LETTER EN" }, |
---|
1505 | { 0x041E, "Ocy", "CYRILLIC CAPITAL LETTER O" }, |
---|
1506 | { 0x041F, "Pcy", "CYRILLIC CAPITAL LETTER PE" }, |
---|
1507 | { 0x0420, "Rcy", "CYRILLIC CAPITAL LETTER ER" }, |
---|
1508 | { 0x0421, "Scy", "CYRILLIC CAPITAL LETTER ES" }, |
---|
1509 | { 0x0422, "Tcy", "CYRILLIC CAPITAL LETTER TE" }, |
---|
1510 | { 0x0423, "Ucy", "CYRILLIC CAPITAL LETTER U" }, |
---|
1511 | { 0x0424, "Fcy", "CYRILLIC CAPITAL LETTER EF" }, |
---|
1512 | { 0x0425, "KHcy", "CYRILLIC CAPITAL LETTER HA" }, |
---|
1513 | { 0x0426, "TScy", "CYRILLIC CAPITAL LETTER TSE" }, |
---|
1514 | { 0x0427, "CHcy", "CYRILLIC CAPITAL LETTER CHE" }, |
---|
1515 | { 0x0428, "SHcy", "CYRILLIC CAPITAL LETTER SHA" }, |
---|
1516 | { 0x0429, "SHCHcy", "CYRILLIC CAPITAL LETTER SHCHA" }, |
---|
1517 | { 0x042A, "HARDcy", "CYRILLIC CAPITAL LETTER HARD SIGN" }, |
---|
1518 | { 0x042B, "Ycy", "CYRILLIC CAPITAL LETTER YERU" }, |
---|
1519 | { 0x042C, "SOFTcy", "CYRILLIC CAPITAL LETTER SOFT SIGN" }, |
---|
1520 | { 0x042D, "Ecy", "CYRILLIC CAPITAL LETTER E" }, |
---|
1521 | { 0x042E, "YUcy", "CYRILLIC CAPITAL LETTER YU" }, |
---|
1522 | { 0x042F, "YAcy", "CYRILLIC CAPITAL LETTER YA" }, |
---|
1523 | { 0x0430, "acy", "CYRILLIC SMALL LETTER A" }, |
---|
1524 | { 0x0431, "bcy", "CYRILLIC SMALL LETTER BE" }, |
---|
1525 | { 0x0432, "vcy", "CYRILLIC SMALL LETTER VE" }, |
---|
1526 | { 0x0433, "gcy", "CYRILLIC SMALL LETTER GHE" }, |
---|
1527 | { 0x0434, "dcy", "CYRILLIC SMALL LETTER DE" }, |
---|
1528 | { 0x0435, "iecy", "CYRILLIC SMALL LETTER IE" }, |
---|
1529 | { 0x0436, "zhcy", "CYRILLIC SMALL LETTER ZHE" }, |
---|
1530 | { 0x0437, "zcy", "CYRILLIC SMALL LETTER ZE" }, |
---|
1531 | { 0x0438, "icy", "CYRILLIC SMALL LETTER I" }, |
---|
1532 | { 0x0439, "jcy", "CYRILLIC SMALL LETTER SHORT I" }, |
---|
1533 | { 0x043A, "kcy", "CYRILLIC SMALL LETTER KA" }, |
---|
1534 | { 0x043B, "lcy", "CYRILLIC SMALL LETTER EL" }, |
---|
1535 | { 0x043C, "mcy", "CYRILLIC SMALL LETTER EM" }, |
---|
1536 | { 0x043D, "ncy", "CYRILLIC SMALL LETTER EN" }, |
---|
1537 | { 0x043E, "ocy", "CYRILLIC SMALL LETTER O" }, |
---|
1538 | { 0x043F, "pcy", "CYRILLIC SMALL LETTER PE" }, |
---|
1539 | { 0x0440, "rcy", "CYRILLIC SMALL LETTER ER" }, |
---|
1540 | { 0x0441, "scy", "CYRILLIC SMALL LETTER ES" }, |
---|
1541 | { 0x0442, "tcy", "CYRILLIC SMALL LETTER TE" }, |
---|
1542 | { 0x0443, "ucy", "CYRILLIC SMALL LETTER U" }, |
---|
1543 | { 0x0444, "fcy", "CYRILLIC SMALL LETTER EF" }, |
---|
1544 | { 0x0445, "khcy", "CYRILLIC SMALL LETTER HA" }, |
---|
1545 | { 0x0446, "tscy", "CYRILLIC SMALL LETTER TSE" }, |
---|
1546 | { 0x0447, "chcy", "CYRILLIC SMALL LETTER CHE" }, |
---|
1547 | { 0x0448, "shcy", "CYRILLIC SMALL LETTER SHA" }, |
---|
1548 | { 0x0449, "shchcy", "CYRILLIC SMALL LETTER SHCHA" }, |
---|
1549 | { 0x044A, "hardcy", "CYRILLIC SMALL LETTER HARD SIGN" }, |
---|
1550 | { 0x044B, "ycy", "CYRILLIC SMALL LETTER YERU" }, |
---|
1551 | { 0x044C, "softcy", "CYRILLIC SMALL LETTER SOFT SIGN" }, |
---|
1552 | { 0x044D, "ecy", "CYRILLIC SMALL LETTER E" }, |
---|
1553 | { 0x044E, "yucy", "CYRILLIC SMALL LETTER YU" }, |
---|
1554 | { 0x044F, "yacy", "CYRILLIC SMALL LETTER YA" }, |
---|
1555 | { 0x0451, "iocy", "CYRILLIC SMALL LETTER IO" }, |
---|
1556 | { 0x0452, "djcy", "CYRILLIC SMALL LETTER DJE" }, |
---|
1557 | { 0x0453, "gjcy", "CYRILLIC SMALL LETTER GJE" }, |
---|
1558 | { 0x0454, "jukcy", "CYRILLIC SMALL LETTER UKRAINIAN IE" }, |
---|
1559 | { 0x0455, "dscy", "CYRILLIC SMALL LETTER DZE" }, |
---|
1560 | { 0x0456, "iukcy", "CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I" }, |
---|
1561 | { 0x0457, "yicy", "CYRILLIC SMALL LETTER YI" }, |
---|
1562 | { 0x0458, "jsercy", "CYRILLIC SMALL LETTER JE" }, |
---|
1563 | { 0x0459, "ljcy", "CYRILLIC SMALL LETTER LJE" }, |
---|
1564 | { 0x045A, "njcy", "CYRILLIC SMALL LETTER NJE" }, |
---|
1565 | { 0x045B, "tshcy", "CYRILLIC SMALL LETTER TSHE" }, |
---|
1566 | { 0x045C, "kjcy", "CYRILLIC SMALL LETTER KJE" }, |
---|
1567 | { 0x045E, "ubrcy", "CYRILLIC SMALL LETTER SHORT U" }, |
---|
1568 | { 0x045F, "dzcy", "CYRILLIC SMALL LETTER DZHE" }, |
---|
1569 | { 0x2002, "ensp", "EN SPACE" }, |
---|
1570 | { 0x2003, "emsp", "EM SPACE" }, |
---|
1571 | { 0x2004, "emsp13", "THREE-PER-EM SPACE" }, |
---|
1572 | { 0x2005, "emsp14", "FOUR-PER-EM SPACE" }, |
---|
1573 | { 0x2007, "numsp", "FIGURE SPACE" }, |
---|
1574 | { 0x2008, "puncsp", "PUNCTUATION SPACE" }, |
---|
1575 | { 0x2009, "thinsp", "THIN SPACE" }, |
---|
1576 | { 0x200A, "hairsp", "HAIR SPACE" }, |
---|
1577 | { 0x2010, "dash", "HYPHEN" }, |
---|
1578 | { 0x2013, "ndash", "EN DASH" }, |
---|
1579 | { 0x2014, "mdash", "EM DASH" }, |
---|
1580 | { 0x2015, "horbar", "HORIZONTAL BAR" }, |
---|
1581 | { 0x2016, "Verbar", "DOUBLE VERTICAL LINE" }, |
---|
1582 | { 0x2018, "lsquo", "" }, |
---|
1583 | { 0x2018, "rsquor", "" }, |
---|
1584 | { 0x2019, "rsquo", "RIGHT SINGLE QUOTATION MARK" }, |
---|
1585 | { 0x201A, "lsquor", "SINGLE LOW-9 QUOTATION MARK" }, |
---|
1586 | { 0x201C, "ldquo", "" }, |
---|
1587 | { 0x201C, "rdquor", "" }, |
---|
1588 | { 0x201D, "rdquo", "RIGHT DOUBLE QUOTATION MARK" }, |
---|
1589 | { 0x201E, "ldquor", "DOUBLE LOW-9 QUOTATION MARK" }, |
---|
1590 | { 0x2020, "dagger", "DAGGER" }, |
---|
1591 | { 0x2021, "Dagger", "DOUBLE DAGGER" }, |
---|
1592 | { 0x2022, "bull", "BULLET" }, |
---|
1593 | { 0x2025, "nldr", "TWO DOT LEADER" }, |
---|
1594 | { 0x2026, "hellip", "HORIZONTAL ELLIPSIS" }, |
---|
1595 | { 0x2026, "mldr", "HORIZONTAL ELLIPSIS" }, |
---|
1596 | { 0x2030, "permil", "PER MILLE SIGN" }, |
---|
1597 | { 0x2032, "prime", "PRIME" }, |
---|
1598 | { 0x2032, "vprime", "PRIME" }, |
---|
1599 | { 0x2033, "Prime", "DOUBLE PRIME" }, |
---|
1600 | { 0x2034, "tprime", "TRIPLE PRIME" }, |
---|
1601 | { 0x2035, "bprime", "REVERSED PRIME" }, |
---|
1602 | { 0x2041, "caret", "CARET" }, |
---|
1603 | { 0x2043, "hybull", "HYPHEN BULLET" }, |
---|
1604 | { 0x20DB, "tdot", "COMBINING THREE DOTS ABOVE" }, |
---|
1605 | { 0x20DC, "DotDot", "COMBINING FOUR DOTS ABOVE" }, |
---|
1606 | { 0x2105, "incare", "CARE OF" }, |
---|
1607 | { 0x210B, "hamilt", "SCRIPT CAPITAL H" }, |
---|
1608 | { 0x210F, "planck", "PLANCK CONSTANT OVER TWO PI" }, |
---|
1609 | { 0x2111, "image", "BLACK-LETTER CAPITAL I" }, |
---|
1610 | { 0x2112, "lagran", "SCRIPT CAPITAL L" }, |
---|
1611 | { 0x2113, "ell", "SCRIPT SMALL L" }, |
---|
1612 | { 0x2116, "numero", "NUMERO SIGN" }, |
---|
1613 | { 0x2117, "copysr", "SOUND RECORDING COPYRIGHT" }, |
---|
1614 | { 0x2118, "weierp", "SCRIPT CAPITAL P" }, |
---|
1615 | { 0x211C, "real", "BLACK-LETTER CAPITAL R" }, |
---|
1616 | { 0x211E, "rx", "PRESCRIPTION TAKE" }, |
---|
1617 | { 0x2122, "trade", "TRADE MARK SIGN" }, |
---|
1618 | { 0x2126, "ohm", "OHM SIGN" }, |
---|
1619 | { 0x212B, "angst", "ANGSTROM SIGN" }, |
---|
1620 | { 0x212C, "bernou", "SCRIPT CAPITAL B" }, |
---|
1621 | { 0x2133, "phmmat", "SCRIPT CAPITAL M" }, |
---|
1622 | { 0x2134, "order", "SCRIPT SMALL O" }, |
---|
1623 | { 0x2135, "aleph", "ALEF SYMBOL" }, |
---|
1624 | { 0x2136, "beth", "BET SYMBOL" }, |
---|
1625 | { 0x2137, "gimel", "GIMEL SYMBOL" }, |
---|
1626 | { 0x2138, "daleth", "DALET SYMBOL" }, |
---|
1627 | { 0x2153, "frac13", "VULGAR FRACTION ONE THIRD" }, |
---|
1628 | { 0x2154, "frac23", "VULGAR FRACTION TWO THIRDS" }, |
---|
1629 | { 0x2155, "frac15", "VULGAR FRACTION ONE FIFTH" }, |
---|
1630 | { 0x2156, "frac25", "VULGAR FRACTION TWO FIFTHS" }, |
---|
1631 | { 0x2157, "frac35", "VULGAR FRACTION THREE FIFTHS" }, |
---|
1632 | { 0x2158, "frac45", "VULGAR FRACTION FOUR FIFTHS" }, |
---|
1633 | { 0x2159, "frac16", "VULGAR FRACTION ONE SIXTH" }, |
---|
1634 | { 0x215A, "frac56", "VULGAR FRACTION FIVE SIXTHS" }, |
---|
1635 | { 0x215B, "frac18", "" }, |
---|
1636 | { 0x215C, "frac38", "" }, |
---|
1637 | { 0x215D, "frac58", "" }, |
---|
1638 | { 0x215E, "frac78", "" }, |
---|
1639 | { 0x2190, "larr", "LEFTWARDS DOUBLE ARROW" }, |
---|
1640 | { 0x2191, "uarr", "UPWARDS ARROW" }, |
---|
1641 | { 0x2192, "rarr", "RIGHTWARDS DOUBLE ARROW" }, |
---|
1642 | { 0x2193, "darr", "DOWNWARDS ARROW" }, |
---|
1643 | { 0x2194, "harr", "LEFT RIGHT ARROW" }, |
---|
1644 | { 0x2194, "xhArr", "LEFT RIGHT ARROW" }, |
---|
1645 | { 0x2194, "xharr", "LEFT RIGHT ARROW" }, |
---|
1646 | { 0x2195, "varr", "UP DOWN ARROW" }, |
---|
1647 | { 0x2196, "nwarr", "NORTH WEST ARROW" }, |
---|
1648 | { 0x2197, "nearr", "NORTH EAST ARROW" }, |
---|
1649 | { 0x2198, "drarr", "SOUTH EAST ARROW" }, |
---|
1650 | { 0x2199, "dlarr", "SOUTH WEST ARROW" }, |
---|
1651 | { 0x219A, "nlarr", "LEFTWARDS ARROW WITH STROKE" }, |
---|
1652 | { 0x219B, "nrarr", "RIGHTWARDS ARROW WITH STROKE" }, |
---|
1653 | { 0x219D, "rarrw", "RIGHTWARDS SQUIGGLE ARROW" }, |
---|
1654 | { 0x219E, "Larr", "LEFTWARDS TWO HEADED ARROW" }, |
---|
1655 | { 0x21A0, "Rarr", "RIGHTWARDS TWO HEADED ARROW" }, |
---|
1656 | { 0x21A2, "larrtl", "LEFTWARDS ARROW WITH TAIL" }, |
---|
1657 | { 0x21A3, "rarrtl", "RIGHTWARDS ARROW WITH TAIL" }, |
---|
1658 | { 0x21A6, "map", "RIGHTWARDS ARROW FROM BAR" }, |
---|
1659 | { 0x21A9, "larrhk", "LEFTWARDS ARROW WITH HOOK" }, |
---|
1660 | { 0x21AA, "rarrhk", "RIGHTWARDS ARROW WITH HOOK" }, |
---|
1661 | { 0x21AB, "larrlp", "LEFTWARDS ARROW WITH LOOP" }, |
---|
1662 | { 0x21AC, "rarrlp", "RIGHTWARDS ARROW WITH LOOP" }, |
---|
1663 | { 0x21AD, "harrw", "LEFT RIGHT WAVE ARROW" }, |
---|
1664 | { 0x21AE, "nharr", "LEFT RIGHT ARROW WITH STROKE" }, |
---|
1665 | { 0x21B0, "lsh", "UPWARDS ARROW WITH TIP LEFTWARDS" }, |
---|
1666 | { 0x21B1, "rsh", "UPWARDS ARROW WITH TIP RIGHTWARDS" }, |
---|
1667 | { 0x21B6, "cularr", "ANTICLOCKWISE TOP SEMICIRCLE ARROW" }, |
---|
1668 | { 0x21B7, "curarr", "CLOCKWISE TOP SEMICIRCLE ARROW" }, |
---|
1669 | { 0x21BA, "olarr", "ANTICLOCKWISE OPEN CIRCLE ARROW" }, |
---|
1670 | { 0x21BB, "orarr", "CLOCKWISE OPEN CIRCLE ARROW" }, |
---|
1671 | { 0x21BC, "lharu", "LEFTWARDS HARPOON WITH BARB UPWARDS" }, |
---|
1672 | { 0x21BD, "lhard", "LEFTWARDS HARPOON WITH BARB DOWNWARDS" }, |
---|
1673 | { 0x21BE, "uharr", "UPWARDS HARPOON WITH BARB RIGHTWARDS" }, |
---|
1674 | { 0x21BF, "uharl", "UPWARDS HARPOON WITH BARB LEFTWARDS" }, |
---|
1675 | { 0x21C0, "rharu", "RIGHTWARDS HARPOON WITH BARB UPWARDS" }, |
---|
1676 | { 0x21C1, "rhard", "RIGHTWARDS HARPOON WITH BARB DOWNWARDS" }, |
---|
1677 | { 0x21C2, "dharr", "DOWNWARDS HARPOON WITH BARB RIGHTWARDS" }, |
---|
1678 | { 0x21C3, "dharl", "DOWNWARDS HARPOON WITH BARB LEFTWARDS" }, |
---|
1679 | { 0x21C4, "rlarr2", "RIGHTWARDS ARROW OVER LEFTWARDS ARROW" }, |
---|
1680 | { 0x21C6, "lrarr2", "LEFTWARDS ARROW OVER RIGHTWARDS ARROW" }, |
---|
1681 | { 0x21C7, "larr2", "LEFTWARDS PAIRED ARROWS" }, |
---|
1682 | { 0x21C8, "uarr2", "UPWARDS PAIRED ARROWS" }, |
---|
1683 | { 0x21C9, "rarr2", "RIGHTWARDS PAIRED ARROWS" }, |
---|
1684 | { 0x21CA, "darr2", "DOWNWARDS PAIRED ARROWS" }, |
---|
1685 | { 0x21CB, "lrhar2", "LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON" }, |
---|
1686 | { 0x21CC, "rlhar2", "RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON" }, |
---|
1687 | { 0x21CD, "nlArr", "LEFTWARDS DOUBLE ARROW WITH STROKE" }, |
---|
1688 | { 0x21CE, "nhArr", "LEFT RIGHT DOUBLE ARROW WITH STROKE" }, |
---|
1689 | { 0x21CF, "nrArr", "RIGHTWARDS DOUBLE ARROW WITH STROKE" }, |
---|
1690 | { 0x21D0, "lArr", "LEFTWARDS ARROW" }, |
---|
1691 | { 0x21D0, "xlArr", "LEFTWARDS DOUBLE ARROW" }, |
---|
1692 | { 0x21D1, "uArr", "UPWARDS DOUBLE ARROW" }, |
---|
1693 | { 0x21D2, "rArr", "RIGHTWARDS ARROW" }, |
---|
1694 | { 0x21D2, "xrArr", "RIGHTWARDS DOUBLE ARROW" }, |
---|
1695 | { 0x21D3, "dArr", "DOWNWARDS DOUBLE ARROW" }, |
---|
1696 | { 0x21D4, "hArr", "" }, |
---|
1697 | { 0x21D4, "iff", "LEFT RIGHT DOUBLE ARROW" }, |
---|
1698 | { 0x21D5, "vArr", "UP DOWN DOUBLE ARROW" }, |
---|
1699 | { 0x21DA, "lAarr", "LEFTWARDS TRIPLE ARROW" }, |
---|
1700 | { 0x21DB, "rAarr", "RIGHTWARDS TRIPLE ARROW" }, |
---|
1701 | { 0x2200, "forall", "" }, |
---|
1702 | { 0x2201, "comp", "COMPLEMENT" }, |
---|
1703 | { 0x2202, "part", "" }, |
---|
1704 | { 0x2203, "exist", "" }, |
---|
1705 | { 0x2204, "nexist", "THERE DOES NOT EXIST" }, |
---|
1706 | { 0x2205, "empty", "" }, |
---|
1707 | { 0x2207, "nabla", "NABLA" }, |
---|
1708 | { 0x2209, "notin", "" }, |
---|
1709 | { 0x220A, "epsi", "" }, |
---|
1710 | { 0x220A, "epsis", "" }, |
---|
1711 | { 0x220A, "isin", "" }, |
---|
1712 | { 0x220D, "bepsi", "SMALL CONTAINS AS MEMBER" }, |
---|
1713 | { 0x220D, "ni", "" }, |
---|
1714 | { 0x220F, "prod", "N-ARY PRODUCT" }, |
---|
1715 | { 0x2210, "amalg", "N-ARY COPRODUCT" }, |
---|
1716 | { 0x2210, "coprod", "N-ARY COPRODUCT" }, |
---|
1717 | { 0x2210, "samalg", "" }, |
---|
1718 | { 0x2211, "sum", "N-ARY SUMMATION" }, |
---|
1719 | { 0x2212, "minus", "MINUS SIGN" }, |
---|
1720 | { 0x2213, "mnplus", "" }, |
---|
1721 | { 0x2214, "plusdo", "DOT PLUS" }, |
---|
1722 | { 0x2216, "setmn", "SET MINUS" }, |
---|
1723 | { 0x2216, "ssetmn", "SET MINUS" }, |
---|
1724 | { 0x2217, "lowast", "ASTERISK OPERATOR" }, |
---|
1725 | { 0x2218, "compfn", "RING OPERATOR" }, |
---|
1726 | { 0x221A, "radic", "" }, |
---|
1727 | { 0x221D, "prop", "" }, |
---|
1728 | { 0x221D, "vprop", "" }, |
---|
1729 | { 0x221E, "infin", "" }, |
---|
1730 | { 0x221F, "ang90", "RIGHT ANGLE" }, |
---|
1731 | { 0x2220, "ang", "ANGLE" }, |
---|
1732 | { 0x2221, "angmsd", "MEASURED ANGLE" }, |
---|
1733 | { 0x2222, "angsph", "" }, |
---|
1734 | { 0x2223, "mid", "" }, |
---|
1735 | { 0x2224, "nmid", "DOES NOT DIVIDE" }, |
---|
1736 | { 0x2225, "par", "PARALLEL TO" }, |
---|
1737 | { 0x2225, "spar", "PARALLEL TO" }, |
---|
1738 | { 0x2226, "npar", "NOT PARALLEL TO" }, |
---|
1739 | { 0x2226, "nspar", "NOT PARALLEL TO" }, |
---|
1740 | { 0x2227, "and", "" }, |
---|
1741 | { 0x2228, "or", "" }, |
---|
1742 | { 0x2229, "cap", "" }, |
---|
1743 | { 0x222A, "cup", "" }, |
---|
1744 | { 0x222B, "int", "" }, |
---|
1745 | { 0x222E, "conint", "" }, |
---|
1746 | { 0x2234, "there4", "" }, |
---|
1747 | { 0x2235, "becaus", "BECAUSE" }, |
---|
1748 | { 0x223C, "sim", "" }, |
---|
1749 | { 0x223C, "thksim", "TILDE OPERATOR" }, |
---|
1750 | { 0x223D, "bsim", "" }, |
---|
1751 | { 0x2240, "wreath", "WREATH PRODUCT" }, |
---|
1752 | { 0x2241, "nsim", "" }, |
---|
1753 | { 0x2243, "sime", "" }, |
---|
1754 | { 0x2244, "nsime", "" }, |
---|
1755 | { 0x2245, "cong", "" }, |
---|
1756 | { 0x2247, "ncong", "NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO" }, |
---|
1757 | { 0x2248, "ap", "" }, |
---|
1758 | { 0x2248, "thkap", "ALMOST EQUAL TO" }, |
---|
1759 | { 0x2249, "nap", "NOT ALMOST EQUAL TO" }, |
---|
1760 | { 0x224A, "ape", "" }, |
---|
1761 | { 0x224C, "bcong", "ALL EQUAL TO" }, |
---|
1762 | { 0x224D, "asymp", "EQUIVALENT TO" }, |
---|
1763 | { 0x224E, "bump", "" }, |
---|
1764 | { 0x224F, "bumpe", "" }, |
---|
1765 | { 0x2250, "esdot", "" }, |
---|
1766 | { 0x2251, "eDot", "" }, |
---|
1767 | { 0x2252, "efDot", "" }, |
---|
1768 | { 0x2253, "erDot", "" }, |
---|
1769 | { 0x2254, "colone", "" }, |
---|
1770 | { 0x2255, "ecolon", "" }, |
---|
1771 | { 0x2256, "ecir", "" }, |
---|
1772 | { 0x2257, "cire", "" }, |
---|
1773 | { 0x2259, "wedgeq", "ESTIMATES" }, |
---|
1774 | { 0x225C, "trie", "" }, |
---|
1775 | { 0x2260, "ne", "" }, |
---|
1776 | { 0x2261, "equiv", "" }, |
---|
1777 | { 0x2262, "nequiv", "NOT IDENTICAL TO" }, |
---|
1778 | { 0x2264, "le", "" }, |
---|
1779 | { 0x2264, "les", "LESS-THAN OR EQUAL TO" }, |
---|
1780 | { 0x2265, "ge", "GREATER-THAN OR EQUAL TO" }, |
---|
1781 | { 0x2265, "ges", "GREATER-THAN OR EQUAL TO" }, |
---|
1782 | { 0x2266, "lE", "" }, |
---|
1783 | { 0x2267, "gE", "" }, |
---|
1784 | { 0x2268, "lnE", "" }, |
---|
1785 | { 0x2268, "lne", "" }, |
---|
1786 | { 0x2268, "lvnE", "LESS-THAN BUT NOT EQUAL TO" }, |
---|
1787 | { 0x2269, "gnE", "" }, |
---|
1788 | { 0x2269, "gne", "" }, |
---|
1789 | { 0x2269, "gvnE", "GREATER-THAN BUT NOT EQUAL TO" }, |
---|
1790 | { 0x226A, "Lt", "MUCH LESS-THAN" }, |
---|
1791 | { 0x226B, "Gt", "MUCH GREATER-THAN" }, |
---|
1792 | { 0x226C, "twixt", "BETWEEN" }, |
---|
1793 | { 0x226E, "nlt", "NOT LESS-THAN" }, |
---|
1794 | { 0x226F, "ngt", "NOT GREATER-THAN" }, |
---|
1795 | { 0x2270, "nlE", "" }, |
---|
1796 | { 0x2270, "nle", "NEITHER LESS-THAN NOR EQUAL TO" }, |
---|
1797 | { 0x2270, "nles", "" }, |
---|
1798 | { 0x2271, "ngE", "" }, |
---|
1799 | { 0x2271, "nge", "NEITHER GREATER-THAN NOR EQUAL TO" }, |
---|
1800 | { 0x2271, "nges", "" }, |
---|
1801 | { 0x2272, "lap", "LESS-THAN OR EQUIVALENT TO" }, |
---|
1802 | { 0x2272, "lsim", "LESS-THAN OR EQUIVALENT TO" }, |
---|
1803 | { 0x2273, "gap", "GREATER-THAN OR EQUIVALENT TO" }, |
---|
1804 | { 0x2273, "gsim", "GREATER-THAN OR EQUIVALENT TO" }, |
---|
1805 | { 0x2276, "lg", "LESS-THAN OR GREATER-THAN" }, |
---|
1806 | { 0x2277, "gl", "" }, |
---|
1807 | { 0x227A, "pr", "" }, |
---|
1808 | { 0x227B, "sc", "" }, |
---|
1809 | { 0x227C, "cupre", "" }, |
---|
1810 | { 0x227C, "pre", "" }, |
---|
1811 | { 0x227D, "sccue", "" }, |
---|
1812 | { 0x227D, "sce", "" }, |
---|
1813 | { 0x227E, "prap", "" }, |
---|
1814 | { 0x227E, "prsim", "" }, |
---|
1815 | { 0x227F, "scap", "" }, |
---|
1816 | { 0x227F, "scsim", "" }, |
---|
1817 | { 0x2280, "npr", "DOES NOT PRECEDE" }, |
---|
1818 | { 0x2281, "nsc", "DOES NOT SUCCEED" }, |
---|
1819 | { 0x2282, "sub", "" }, |
---|
1820 | { 0x2283, "sup", "" }, |
---|
1821 | { 0x2284, "nsub", "NOT A SUBSET OF" }, |
---|
1822 | { 0x2285, "nsup", "NOT A SUPERSET OF" }, |
---|
1823 | { 0x2286, "subE", "" }, |
---|
1824 | { 0x2286, "sube", "" }, |
---|
1825 | { 0x2287, "supE", "" }, |
---|
1826 | { 0x2287, "supe", "" }, |
---|
1827 | { 0x2288, "nsubE", "" }, |
---|
1828 | { 0x2288, "nsube", "" }, |
---|
1829 | { 0x2289, "nsupE", "" }, |
---|
1830 | { 0x2289, "nsupe", "" }, |
---|
1831 | { 0x228A, "subne", "" }, |
---|
1832 | { 0x228A, "subnE", "SUBSET OF WITH NOT EQUAL TO" }, |
---|
1833 | { 0x228A, "vsubne", "SUBSET OF WITH NOT EQUAL TO" }, |
---|
1834 | { 0x228B, "supnE", "" }, |
---|
1835 | { 0x228B, "supne", "" }, |
---|
1836 | { 0x228B, "vsupnE", "SUPERSET OF WITH NOT EQUAL TO" }, |
---|
1837 | { 0x228B, "vsupne", "SUPERSET OF WITH NOT EQUAL TO" }, |
---|
1838 | { 0x228E, "uplus", "MULTISET UNION" }, |
---|
1839 | { 0x228F, "sqsub", "" }, |
---|
1840 | { 0x2290, "sqsup", "" }, |
---|
1841 | { 0x2291, "sqsube", "" }, |
---|
1842 | { 0x2292, "sqsupe", "" }, |
---|
1843 | { 0x2293, "sqcap", "SQUARE CAP" }, |
---|
1844 | { 0x2294, "sqcup", "SQUARE CUP" }, |
---|
1845 | { 0x2295, "oplus", "CIRCLED PLUS" }, |
---|
1846 | { 0x2296, "ominus", "CIRCLED MINUS" }, |
---|
1847 | { 0x2297, "otimes", "CIRCLED TIMES" }, |
---|
1848 | { 0x2298, "osol", "CIRCLED DIVISION SLASH" }, |
---|
1849 | { 0x2299, "odot", "CIRCLED DOT OPERATOR" }, |
---|
1850 | { 0x229A, "ocir", "CIRCLED RING OPERATOR" }, |
---|
1851 | { 0x229B, "oast", "CIRCLED ASTERISK OPERATOR" }, |
---|
1852 | { 0x229D, "odash", "CIRCLED DASH" }, |
---|
1853 | { 0x229E, "plusb", "SQUARED PLUS" }, |
---|
1854 | { 0x229F, "minusb", "SQUARED MINUS" }, |
---|
1855 | { 0x22A0, "timesb", "SQUARED TIMES" }, |
---|
1856 | { 0x22A1, "sdotb", "SQUARED DOT OPERATOR" }, |
---|
1857 | { 0x22A2, "vdash", "" }, |
---|
1858 | { 0x22A3, "dashv", "" }, |
---|
1859 | { 0x22A4, "top", "DOWN TACK" }, |
---|
1860 | { 0x22A5, "bottom", "" }, |
---|
1861 | { 0x22A5, "perp", "" }, |
---|
1862 | { 0x22A7, "models", "MODELS" }, |
---|
1863 | { 0x22A8, "vDash", "" }, |
---|
1864 | { 0x22A9, "Vdash", "" }, |
---|
1865 | { 0x22AA, "Vvdash", "" }, |
---|
1866 | { 0x22AC, "nvdash", "DOES NOT PROVE" }, |
---|
1867 | { 0x22AD, "nvDash", "NOT TRUE" }, |
---|
1868 | { 0x22AE, "nVdash", "DOES NOT FORCE" }, |
---|
1869 | { 0x22AF, "nVDash", "NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE" }, |
---|
1870 | { 0x22B2, "vltri", "" }, |
---|
1871 | { 0x22B3, "vrtri", "" }, |
---|
1872 | { 0x22B4, "ltrie", "" }, |
---|
1873 | { 0x22B5, "rtrie", "" }, |
---|
1874 | { 0x22B8, "mumap", "MULTIMAP" }, |
---|
1875 | { 0x22BA, "intcal", "INTERCALATE" }, |
---|
1876 | { 0x22BB, "veebar", "" }, |
---|
1877 | { 0x22BC, "barwed", "NAND" }, |
---|
1878 | { 0x22C4, "diam", "DIAMOND OPERATOR" }, |
---|
1879 | { 0x22C5, "sdot", "DOT OPERATOR" }, |
---|
1880 | { 0x22C6, "sstarf", "STAR OPERATOR" }, |
---|
1881 | { 0x22C6, "star", "STAR OPERATOR" }, |
---|
1882 | { 0x22C7, "divonx", "DIVISION TIMES" }, |
---|
1883 | { 0x22C8, "bowtie", "" }, |
---|
1884 | { 0x22C9, "ltimes", "LEFT NORMAL FACTOR SEMIDIRECT PRODUCT" }, |
---|
1885 | { 0x22CA, "rtimes", "RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT" }, |
---|
1886 | { 0x22CB, "lthree", "LEFT SEMIDIRECT PRODUCT" }, |
---|
1887 | { 0x22CC, "rthree", "RIGHT SEMIDIRECT PRODUCT" }, |
---|
1888 | { 0x22CD, "bsime", "" }, |
---|
1889 | { 0x22CE, "cuvee", "CURLY LOGICAL OR" }, |
---|
1890 | { 0x22CF, "cuwed", "CURLY LOGICAL AND" }, |
---|
1891 | { 0x22D0, "Sub", "" }, |
---|
1892 | { 0x22D1, "Sup", "" }, |
---|
1893 | { 0x22D2, "Cap", "DOUBLE INTERSECTION" }, |
---|
1894 | { 0x22D3, "Cup", "DOUBLE UNION" }, |
---|
1895 | { 0x22D4, "fork", "" }, |
---|
1896 | { 0x22D6, "ldot", "" }, |
---|
1897 | { 0x22D7, "gsdot", "" }, |
---|
1898 | { 0x22D8, "Ll", "" }, |
---|
1899 | { 0x22D9, "Gg", "VERY MUCH GREATER-THAN" }, |
---|
1900 | { 0x22DA, "lEg", "" }, |
---|
1901 | { 0x22DA, "leg", "" }, |
---|
1902 | { 0x22DB, "gEl", "" }, |
---|
1903 | { 0x22DB, "gel", "" }, |
---|
1904 | { 0x22DC, "els", "" }, |
---|
1905 | { 0x22DD, "egs", "" }, |
---|
1906 | { 0x22DE, "cuepr", "" }, |
---|
1907 | { 0x22DF, "cuesc", "" }, |
---|
1908 | { 0x22E0, "npre", "DOES NOT PRECEDE OR EQUAL" }, |
---|
1909 | { 0x22E1, "nsce", "DOES NOT SUCCEED OR EQUAL" }, |
---|
1910 | { 0x22E6, "lnsim", "" }, |
---|
1911 | { 0x22E7, "gnsim", "GREATER-THAN BUT NOT EQUIVALENT TO" }, |
---|
1912 | { 0x22E8, "prnap", "" }, |
---|
1913 | { 0x22E8, "prnsim", "" }, |
---|
1914 | { 0x22E9, "scnap", "" }, |
---|
1915 | { 0x22E9, "scnsim", "" }, |
---|
1916 | { 0x22EA, "nltri", "NOT NORMAL SUBGROUP OF" }, |
---|
1917 | { 0x22EB, "nrtri", "DOES NOT CONTAIN AS NORMAL SUBGROUP" }, |
---|
1918 | { 0x22EC, "nltrie", "NOT NORMAL SUBGROUP OF OR EQUAL TO" }, |
---|
1919 | { 0x22ED, "nrtrie", "DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL" }, |
---|
1920 | { 0x22EE, "vellip", "" }, |
---|
1921 | { 0x2306, "Barwed", "PERSPECTIVE" }, |
---|
1922 | { 0x2308, "lceil", "LEFT CEILING" }, |
---|
1923 | { 0x2309, "rceil", "RIGHT CEILING" }, |
---|
1924 | { 0x230A, "lfloor", "LEFT FLOOR" }, |
---|
1925 | { 0x230B, "rfloor", "RIGHT FLOOR" }, |
---|
1926 | { 0x230C, "drcrop", "BOTTOM RIGHT CROP" }, |
---|
1927 | { 0x230D, "dlcrop", "BOTTOM LEFT CROP" }, |
---|
1928 | { 0x230E, "urcrop", "TOP RIGHT CROP" }, |
---|
1929 | { 0x230F, "ulcrop", "TOP LEFT CROP" }, |
---|
1930 | { 0x2315, "telrec", "TELEPHONE RECORDER" }, |
---|
1931 | { 0x2316, "target", "POSITION INDICATOR" }, |
---|
1932 | { 0x231C, "ulcorn", "TOP LEFT CORNER" }, |
---|
1933 | { 0x231D, "urcorn", "TOP RIGHT CORNER" }, |
---|
1934 | { 0x231E, "dlcorn", "BOTTOM LEFT CORNER" }, |
---|
1935 | { 0x231F, "drcorn", "BOTTOM RIGHT CORNER" }, |
---|
1936 | { 0x2322, "frown", "" }, |
---|
1937 | { 0x2322, "sfrown", "FROWN" }, |
---|
1938 | { 0x2323, "smile", "" }, |
---|
1939 | { 0x2323, "ssmile", "SMILE" }, |
---|
1940 | { 0x2423, "blank", "OPEN BOX" }, |
---|
1941 | { 0x24C8, "oS", "CIRCLED LATIN CAPITAL LETTER S" }, |
---|
1942 | { 0x2500, "boxh", "BOX DRAWINGS LIGHT HORIZONTAL" }, |
---|
1943 | { 0x2502, "boxv", "BOX DRAWINGS LIGHT VERTICAL" }, |
---|
1944 | { 0x250C, "boxdr", "BOX DRAWINGS LIGHT DOWN AND RIGHT" }, |
---|
1945 | { 0x2510, "boxdl", "BOX DRAWINGS LIGHT DOWN AND LEFT" }, |
---|
1946 | { 0x2514, "boxur", "BOX DRAWINGS LIGHT UP AND RIGHT" }, |
---|
1947 | { 0x2518, "boxul", "BOX DRAWINGS LIGHT UP AND LEFT" }, |
---|
1948 | { 0x251C, "boxvr", "BOX DRAWINGS LIGHT VERTICAL AND RIGHT" }, |
---|
1949 | { 0x2524, "boxvl", "BOX DRAWINGS LIGHT VERTICAL AND LEFT" }, |
---|
1950 | { 0x252C, "boxhd", "BOX DRAWINGS LIGHT DOWN AND HORIZONTAL" }, |
---|
1951 | { 0x2534, "boxhu", "BOX DRAWINGS LIGHT UP AND HORIZONTAL" }, |
---|
1952 | { 0x253C, "boxvh", "BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL" }, |
---|
1953 | { 0x2550, "boxH", "BOX DRAWINGS DOUBLE HORIZONTAL" }, |
---|
1954 | { 0x2551, "boxV", "BOX DRAWINGS DOUBLE VERTICAL" }, |
---|
1955 | { 0x2552, "boxDR", "BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE" }, |
---|
1956 | { 0x2553, "boxDr", "BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE" }, |
---|
1957 | { 0x2554, "boxdR", "BOX DRAWINGS DOUBLE DOWN AND RIGHT" }, |
---|
1958 | { 0x2555, "boxDL", "BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE" }, |
---|
1959 | { 0x2556, "boxdL", "BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE" }, |
---|
1960 | { 0x2557, "boxDl", "BOX DRAWINGS DOUBLE DOWN AND LEFT" }, |
---|
1961 | { 0x2558, "boxUR", "BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE" }, |
---|
1962 | { 0x2559, "boxuR", "BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE" }, |
---|
1963 | { 0x255A, "boxUr", "BOX DRAWINGS DOUBLE UP AND RIGHT" }, |
---|
1964 | { 0x255B, "boxUL", "BOX DRAWINGS UP SINGLE AND LEFT DOUBLE" }, |
---|
1965 | { 0x255C, "boxUl", "BOX DRAWINGS UP DOUBLE AND LEFT SINGLE" }, |
---|
1966 | { 0x255D, "boxuL", "BOX DRAWINGS DOUBLE UP AND LEFT" }, |
---|
1967 | { 0x255E, "boxvR", "BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE" }, |
---|
1968 | { 0x255F, "boxVR", "BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE" }, |
---|
1969 | { 0x2560, "boxVr", "BOX DRAWINGS DOUBLE VERTICAL AND RIGHT" }, |
---|
1970 | { 0x2561, "boxvL", "BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE" }, |
---|
1971 | { 0x2562, "boxVL", "BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE" }, |
---|
1972 | { 0x2563, "boxVl", "BOX DRAWINGS DOUBLE VERTICAL AND LEFT" }, |
---|
1973 | { 0x2564, "boxhD", "BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE" }, |
---|
1974 | { 0x2565, "boxHD", "BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE" }, |
---|
1975 | { 0x2566, "boxHd", "BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL" }, |
---|
1976 | { 0x2567, "boxhU", "BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE" }, |
---|
1977 | { 0x2568, "boxHU", "BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE" }, |
---|
1978 | { 0x2569, "boxHu", "BOX DRAWINGS DOUBLE UP AND HORIZONTAL" }, |
---|
1979 | { 0x256A, "boxvH", "BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE" }, |
---|
1980 | { 0x256B, "boxVH", "BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE" }, |
---|
1981 | { 0x256C, "boxVh", "BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL" }, |
---|
1982 | { 0x2580, "uhblk", "UPPER HALF BLOCK" }, |
---|
1983 | { 0x2584, "lhblk", "LOWER HALF BLOCK" }, |
---|
1984 | { 0x2588, "block", "FULL BLOCK" }, |
---|
1985 | { 0x2591, "blk14", "LIGHT SHADE" }, |
---|
1986 | { 0x2592, "blk12", "MEDIUM SHADE" }, |
---|
1987 | { 0x2593, "blk34", "DARK SHADE" }, |
---|
1988 | { 0x25A1, "square", "WHITE SQUARE" }, |
---|
1989 | { 0x25A1, "squ", "WHITE SQUARE" }, |
---|
1990 | { 0x25AA, "squf", "" }, |
---|
1991 | { 0x25AD, "rect", "WHITE RECTANGLE" }, |
---|
1992 | { 0x25AE, "marker", "BLACK VERTICAL RECTANGLE" }, |
---|
1993 | { 0x25B3, "xutri", "WHITE UP-POINTING TRIANGLE" }, |
---|
1994 | { 0x25B4, "utrif", "BLACK UP-POINTING TRIANGLE" }, |
---|
1995 | { 0x25B5, "utri", "WHITE UP-POINTING TRIANGLE" }, |
---|
1996 | { 0x25B8, "rtrif", "BLACK RIGHT-POINTING TRIANGLE" }, |
---|
1997 | { 0x25B9, "rtri", "WHITE RIGHT-POINTING TRIANGLE" }, |
---|
1998 | { 0x25BD, "xdtri", "WHITE DOWN-POINTING TRIANGLE" }, |
---|
1999 | { 0x25BE, "dtrif", "BLACK DOWN-POINTING TRIANGLE" }, |
---|
2000 | { 0x25BF, "dtri", "WHITE DOWN-POINTING TRIANGLE" }, |
---|
2001 | { 0x25C2, "ltrif", "BLACK LEFT-POINTING TRIANGLE" }, |
---|
2002 | { 0x25C3, "ltri", "WHITE LEFT-POINTING TRIANGLE" }, |
---|
2003 | { 0x25CA, "loz", "LOZENGE" }, |
---|
2004 | { 0x25CB, "cir", "WHITE CIRCLE" }, |
---|
2005 | { 0x25CB, "xcirc", "WHITE CIRCLE" }, |
---|
2006 | { 0x2605, "starf", "BLACK STAR" }, |
---|
2007 | { 0x260E, "phone", "TELEPHONE SIGN" }, |
---|
2008 | { 0x2640, "female", "" }, |
---|
2009 | { 0x2642, "male", "MALE SIGN" }, |
---|
2010 | { 0x2660, "spades", "BLACK SPADE SUIT" }, |
---|
2011 | { 0x2663, "clubs", "BLACK CLUB SUIT" }, |
---|
2012 | { 0x2665, "hearts", "BLACK HEART SUIT" }, |
---|
2013 | { 0x2666, "diams", "BLACK DIAMOND SUIT" }, |
---|
2014 | { 0x2669, "sung", "" }, |
---|
2015 | { 0x266D, "flat", "MUSIC FLAT SIGN" }, |
---|
2016 | { 0x266E, "natur", "MUSIC NATURAL SIGN" }, |
---|
2017 | { 0x266F, "sharp", "MUSIC SHARP SIGN" }, |
---|
2018 | { 0x2713, "check", "CHECK MARK" }, |
---|
2019 | { 0x2717, "cross", "BALLOT X" }, |
---|
2020 | { 0x2720, "malt", "MALTESE CROSS" }, |
---|
2021 | { 0x2726, "lozf", "" }, |
---|
2022 | { 0x2736, "sext", "SIX POINTED BLACK STAR" }, |
---|
2023 | { 0x3008, "lang", "" }, |
---|
2024 | { 0x3009, "rang", "" }, |
---|
2025 | { 0xE291, "rpargt", "" }, |
---|
2026 | { 0xE2A2, "lnap", "" }, |
---|
2027 | { 0xE2AA, "nsmid", "" }, |
---|
2028 | { 0xE2B3, "prnE", "" }, |
---|
2029 | { 0xE2B5, "scnE", "" }, |
---|
2030 | { 0xE2B8, "vsubnE", "" }, |
---|
2031 | { 0xE301, "smid", "" }, |
---|
2032 | { 0xE411, "gnap", "" }, |
---|
2033 | { 0xFB00, "fflig", "" }, |
---|
2034 | { 0xFB01, "filig", "" }, |
---|
2035 | { 0xFB02, "fllig", "" }, |
---|
2036 | { 0xFB03, "ffilig", "" }, |
---|
2037 | { 0xFB04, "ffllig", "" }, |
---|
2038 | { 0xFE68, "sbsol", "SMALL REVERSE SOLIDUS" }, |
---|
2039 | }; |
---|
2040 | |
---|
2041 | /************************************************************************ |
---|
2042 | * * |
---|
2043 | * Commodity functions to handle entities * |
---|
2044 | * * |
---|
2045 | ************************************************************************/ |
---|
2046 | |
---|
2047 | /* |
---|
2048 | * Macro used to grow the current buffer. |
---|
2049 | */ |
---|
2050 | #define growBuffer(buffer) { \ |
---|
2051 | buffer##_size *= 2; \ |
---|
2052 | buffer = (xmlChar *) xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ |
---|
2053 | if (buffer == NULL) { \ |
---|
2054 | perror("realloc failed"); \ |
---|
2055 | return(NULL); \ |
---|
2056 | } \ |
---|
2057 | } |
---|
2058 | |
---|
2059 | /** |
---|
2060 | * docbEntityLookup: |
---|
2061 | * @name: the entity name |
---|
2062 | * |
---|
2063 | * Lookup the given entity in EntitiesTable |
---|
2064 | * |
---|
2065 | * TODO: the linear scan is really ugly, an hash table is really needed. |
---|
2066 | * |
---|
2067 | * Returns the associated docbEntityDescPtr if found, NULL otherwise. |
---|
2068 | */ |
---|
2069 | static docbEntityDescPtr |
---|
2070 | docbEntityLookup(const xmlChar *name) { |
---|
2071 | unsigned int i; |
---|
2072 | |
---|
2073 | for (i = 0;i < (sizeof(docbookEntitiesTable)/ |
---|
2074 | sizeof(docbookEntitiesTable[0]));i++) { |
---|
2075 | if (xmlStrEqual(name, BAD_CAST docbookEntitiesTable[i].name)) { |
---|
2076 | #ifdef DEBUG |
---|
2077 | xmlGenericError(xmlGenericErrorContext,"Found entity %s\n", name); |
---|
2078 | #endif |
---|
2079 | return(&docbookEntitiesTable[i]); |
---|
2080 | } |
---|
2081 | } |
---|
2082 | return(NULL); |
---|
2083 | } |
---|
2084 | |
---|
2085 | /** |
---|
2086 | * docbEntityValueLookup: |
---|
2087 | * @value: the entity's unicode value |
---|
2088 | * |
---|
2089 | * Lookup the given entity in EntitiesTable |
---|
2090 | * |
---|
2091 | * TODO: the linear scan is really ugly, an hash table is really needed. |
---|
2092 | * |
---|
2093 | * Returns the associated docbEntityDescPtr if found, NULL otherwise. |
---|
2094 | */ |
---|
2095 | static docbEntityDescPtr |
---|
2096 | docbEntityValueLookup(int value) { |
---|
2097 | unsigned int i; |
---|
2098 | #ifdef DEBUG |
---|
2099 | int lv = 0; |
---|
2100 | #endif |
---|
2101 | |
---|
2102 | for (i = 0;i < (sizeof(docbookEntitiesTable)/ |
---|
2103 | sizeof(docbookEntitiesTable[0]));i++) { |
---|
2104 | if (docbookEntitiesTable[i].value >= value) { |
---|
2105 | if (docbookEntitiesTable[i].value > value) |
---|
2106 | break; |
---|
2107 | #ifdef DEBUG |
---|
2108 | xmlGenericError(xmlGenericErrorContext,"Found entity %s\n", docbookEntitiesTable[i].name); |
---|
2109 | #endif |
---|
2110 | return(&docbookEntitiesTable[i]); |
---|
2111 | } |
---|
2112 | #ifdef DEBUG |
---|
2113 | if (lv > docbookEntitiesTable[i].value) { |
---|
2114 | xmlGenericError(xmlGenericErrorContext, |
---|
2115 | "docbookEntitiesTable[] is not sorted (%d > %d)!\n", |
---|
2116 | lv, docbookEntitiesTable[i].value); |
---|
2117 | } |
---|
2118 | lv = docbookEntitiesTable[i].value; |
---|
2119 | #endif |
---|
2120 | } |
---|
2121 | return(NULL); |
---|
2122 | } |
---|
2123 | |
---|
2124 | #if 0 |
---|
2125 | /** |
---|
2126 | * UTF8ToSgml: |
---|
2127 | * @out: a pointer to an array of bytes to store the result |
---|
2128 | * @outlen: the length of @out |
---|
2129 | * @in: a pointer to an array of UTF-8 chars |
---|
2130 | * @inlen: the length of @in |
---|
2131 | * |
---|
2132 | * Take a block of UTF-8 chars in and try to convert it to an ASCII |
---|
2133 | * plus SGML entities block of chars out. |
---|
2134 | * |
---|
2135 | * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise |
---|
2136 | * The value of @inlen after return is the number of octets consumed |
---|
2137 | * as the return value is positive, else unpredictable. |
---|
2138 | * The value of @outlen after return is the number of octets consumed. |
---|
2139 | */ |
---|
2140 | int |
---|
2141 | UTF8ToSgml(unsigned char* out, int *outlen, |
---|
2142 | const unsigned char* in, int *inlen) { |
---|
2143 | const unsigned char* processed = in; |
---|
2144 | const unsigned char* outend; |
---|
2145 | const unsigned char* outstart = out; |
---|
2146 | const unsigned char* instart = in; |
---|
2147 | const unsigned char* inend; |
---|
2148 | unsigned int c, d; |
---|
2149 | int trailing; |
---|
2150 | |
---|
2151 | if (in == NULL) { |
---|
2152 | /* |
---|
2153 | * initialization nothing to do |
---|
2154 | */ |
---|
2155 | *outlen = 0; |
---|
2156 | *inlen = 0; |
---|
2157 | return(0); |
---|
2158 | } |
---|
2159 | inend = in + (*inlen); |
---|
2160 | outend = out + (*outlen); |
---|
2161 | while (in < inend) { |
---|
2162 | d = *in++; |
---|
2163 | if (d < 0x80) { c= d; trailing= 0; } |
---|
2164 | else if (d < 0xC0) { |
---|
2165 | /* trailing byte in leading position */ |
---|
2166 | *outlen = out - outstart; |
---|
2167 | *inlen = processed - instart; |
---|
2168 | return(-2); |
---|
2169 | } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } |
---|
2170 | else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } |
---|
2171 | else if (d < 0xF8) { c= d & 0x07; trailing= 3; } |
---|
2172 | else { |
---|
2173 | /* no chance for this in Ascii */ |
---|
2174 | *outlen = out - outstart; |
---|
2175 | *inlen = processed - instart; |
---|
2176 | return(-2); |
---|
2177 | } |
---|
2178 | |
---|
2179 | if (inend - in < trailing) { |
---|
2180 | break; |
---|
2181 | } |
---|
2182 | |
---|
2183 | for ( ; trailing; trailing--) { |
---|
2184 | if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) |
---|
2185 | break; |
---|
2186 | c <<= 6; |
---|
2187 | c |= d & 0x3F; |
---|
2188 | } |
---|
2189 | |
---|
2190 | /* assertion: c is a single UTF-4 value */ |
---|
2191 | if (c < 0x80) { |
---|
2192 | if (out + 1 >= outend) |
---|
2193 | break; |
---|
2194 | *out++ = c; |
---|
2195 | } else { |
---|
2196 | int len; |
---|
2197 | docbEntityDescPtr ent; |
---|
2198 | |
---|
2199 | /* |
---|
2200 | * Try to lookup a predefined SGML entity for it |
---|
2201 | */ |
---|
2202 | |
---|
2203 | ent = docbEntityValueLookup(c); |
---|
2204 | if (ent == NULL) { |
---|
2205 | /* no chance for this in Ascii */ |
---|
2206 | *outlen = out - outstart; |
---|
2207 | *inlen = processed - instart; |
---|
2208 | return(-2); |
---|
2209 | } |
---|
2210 | len = strlen(ent->name); |
---|
2211 | if (out + 2 + len >= outend) |
---|
2212 | break; |
---|
2213 | *out++ = '&'; |
---|
2214 | memcpy(out, ent->name, len); |
---|
2215 | out += len; |
---|
2216 | *out++ = ';'; |
---|
2217 | } |
---|
2218 | processed = in; |
---|
2219 | } |
---|
2220 | *outlen = out - outstart; |
---|
2221 | *inlen = processed - instart; |
---|
2222 | return(0); |
---|
2223 | } |
---|
2224 | #endif |
---|
2225 | |
---|
2226 | /** |
---|
2227 | * docbEncodeEntities: |
---|
2228 | * @out: a pointer to an array of bytes to store the result |
---|
2229 | * @outlen: the length of @out |
---|
2230 | * @in: a pointer to an array of UTF-8 chars |
---|
2231 | * @inlen: the length of @in |
---|
2232 | * @quoteChar: the quote character to escape (' or ") or zero. |
---|
2233 | * |
---|
2234 | * Take a block of UTF-8 chars in and try to convert it to an ASCII |
---|
2235 | * plus SGML entities block of chars out. |
---|
2236 | * |
---|
2237 | * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise |
---|
2238 | * The value of @inlen after return is the number of octets consumed |
---|
2239 | * as the return value is positive, else unpredictable. |
---|
2240 | * The value of @outlen after return is the number of octets consumed. |
---|
2241 | */ |
---|
2242 | int |
---|
2243 | docbEncodeEntities(unsigned char* out, int *outlen, |
---|
2244 | const unsigned char* in, int *inlen, int quoteChar) { |
---|
2245 | const unsigned char* processed = in; |
---|
2246 | const unsigned char* outend = out + (*outlen); |
---|
2247 | const unsigned char* outstart = out; |
---|
2248 | const unsigned char* instart = in; |
---|
2249 | const unsigned char* inend = in + (*inlen); |
---|
2250 | unsigned int c, d; |
---|
2251 | int trailing; |
---|
2252 | |
---|
2253 | while (in < inend) { |
---|
2254 | d = *in++; |
---|
2255 | if (d < 0x80) { c= d; trailing= 0; } |
---|
2256 | else if (d < 0xC0) { |
---|
2257 | /* trailing byte in leading position */ |
---|
2258 | *outlen = out - outstart; |
---|
2259 | *inlen = processed - instart; |
---|
2260 | return(-2); |
---|
2261 | } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } |
---|
2262 | else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } |
---|
2263 | else if (d < 0xF8) { c= d & 0x07; trailing= 3; } |
---|
2264 | else { |
---|
2265 | /* no chance for this in Ascii */ |
---|
2266 | *outlen = out - outstart; |
---|
2267 | *inlen = processed - instart; |
---|
2268 | return(-2); |
---|
2269 | } |
---|
2270 | |
---|
2271 | if (inend - in < trailing) |
---|
2272 | break; |
---|
2273 | |
---|
2274 | while (trailing--) { |
---|
2275 | if (((d= *in++) & 0xC0) != 0x80) { |
---|
2276 | *outlen = out - outstart; |
---|
2277 | *inlen = processed - instart; |
---|
2278 | return(-2); |
---|
2279 | } |
---|
2280 | c <<= 6; |
---|
2281 | c |= d & 0x3F; |
---|
2282 | } |
---|
2283 | |
---|
2284 | /* assertion: c is a single UTF-4 value */ |
---|
2285 | if (c < 0x80 && c != (unsigned int) quoteChar && c != '&' && c != '<' && c != '>') { |
---|
2286 | if (out >= outend) |
---|
2287 | break; |
---|
2288 | *out++ = c; |
---|
2289 | } else { |
---|
2290 | docbEntityDescPtr ent; |
---|
2291 | const char *cp; |
---|
2292 | char nbuf[16]; |
---|
2293 | int len; |
---|
2294 | |
---|
2295 | /* |
---|
2296 | * Try to lookup a predefined SGML entity for it |
---|
2297 | */ |
---|
2298 | ent = docbEntityValueLookup(c); |
---|
2299 | if (ent == NULL) { |
---|
2300 | sprintf(nbuf, "#%u", c); |
---|
2301 | cp = nbuf; |
---|
2302 | } |
---|
2303 | else |
---|
2304 | cp = ent->name; |
---|
2305 | len = strlen(cp); |
---|
2306 | if (out + 2 + len > outend) |
---|
2307 | break; |
---|
2308 | *out++ = '&'; |
---|
2309 | memcpy(out, cp, len); |
---|
2310 | out += len; |
---|
2311 | *out++ = ';'; |
---|
2312 | } |
---|
2313 | processed = in; |
---|
2314 | } |
---|
2315 | *outlen = out - outstart; |
---|
2316 | *inlen = processed - instart; |
---|
2317 | return(0); |
---|
2318 | } |
---|
2319 | |
---|
2320 | |
---|
2321 | /************************************************************************ |
---|
2322 | * * |
---|
2323 | * Commodity functions to handle streams * |
---|
2324 | * * |
---|
2325 | ************************************************************************/ |
---|
2326 | |
---|
2327 | /** |
---|
2328 | * docbNewInputStream: |
---|
2329 | * @ctxt: an SGML parser context |
---|
2330 | * |
---|
2331 | * Create a new input stream structure |
---|
2332 | * Returns the new input stream or NULL |
---|
2333 | */ |
---|
2334 | static docbParserInputPtr |
---|
2335 | docbNewInputStream(docbParserCtxtPtr ctxt) { |
---|
2336 | docbParserInputPtr input; |
---|
2337 | |
---|
2338 | input = (xmlParserInputPtr) xmlMalloc(sizeof(docbParserInput)); |
---|
2339 | if (input == NULL) { |
---|
2340 | ctxt->errNo = XML_ERR_NO_MEMORY; |
---|
2341 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
2342 | ctxt->sax->error(ctxt->userData, |
---|
2343 | "malloc: couldn't allocate a new input stream\n"); |
---|
2344 | return(NULL); |
---|
2345 | } |
---|
2346 | memset(input, 0, sizeof(docbParserInput)); |
---|
2347 | input->filename = NULL; |
---|
2348 | input->directory = NULL; |
---|
2349 | input->base = NULL; |
---|
2350 | input->cur = NULL; |
---|
2351 | input->buf = NULL; |
---|
2352 | input->line = 1; |
---|
2353 | input->col = 1; |
---|
2354 | input->buf = NULL; |
---|
2355 | input->free = NULL; |
---|
2356 | input->version = NULL; |
---|
2357 | input->consumed = 0; |
---|
2358 | input->length = 0; |
---|
2359 | return(input); |
---|
2360 | } |
---|
2361 | |
---|
2362 | |
---|
2363 | /************************************************************************ |
---|
2364 | * * |
---|
2365 | * Commodity functions, cleanup needed ? * |
---|
2366 | * * |
---|
2367 | ************************************************************************/ |
---|
2368 | |
---|
2369 | /** |
---|
2370 | * areBlanks: |
---|
2371 | * @ctxt: an SGML parser context |
---|
2372 | * @str: a xmlChar * |
---|
2373 | * @len: the size of @str |
---|
2374 | * |
---|
2375 | * Is this a sequence of blank chars that one can ignore ? |
---|
2376 | * |
---|
2377 | * Returns 1 if ignorable 0 otherwise. |
---|
2378 | */ |
---|
2379 | |
---|
2380 | static int areBlanks(docbParserCtxtPtr ctxt, const xmlChar *str, int len) { |
---|
2381 | int i; |
---|
2382 | xmlNodePtr lastChild; |
---|
2383 | |
---|
2384 | for (i = 0;i < len;i++) |
---|
2385 | if (!(IS_BLANK(str[i]))) return(0); |
---|
2386 | |
---|
2387 | if (CUR == 0) return(1); |
---|
2388 | if (CUR != '<') return(0); |
---|
2389 | if (ctxt->name == NULL) |
---|
2390 | return(1); |
---|
2391 | if (ctxt->node == NULL) return(0); |
---|
2392 | lastChild = xmlGetLastChild(ctxt->node); |
---|
2393 | if (lastChild == NULL) { |
---|
2394 | if ((ctxt->node->type != XML_ELEMENT_NODE) && |
---|
2395 | (ctxt->node->content != NULL)) return(0); |
---|
2396 | } else if (xmlNodeIsText(lastChild)) |
---|
2397 | return(0); |
---|
2398 | return(1); |
---|
2399 | } |
---|
2400 | |
---|
2401 | /************************************************************************ |
---|
2402 | * * |
---|
2403 | * External entities support * |
---|
2404 | * * |
---|
2405 | ************************************************************************/ |
---|
2406 | |
---|
2407 | /** |
---|
2408 | * docbParseCtxtExternalEntity: |
---|
2409 | * @ctx: the existing parsing context |
---|
2410 | * @URL: the URL for the entity to load |
---|
2411 | * @ID: the System ID for the entity to load |
---|
2412 | * @list: the return value for the set of parsed nodes |
---|
2413 | * |
---|
2414 | * Parse an external general entity within an existing parsing context |
---|
2415 | * |
---|
2416 | * Returns 0 if the entity is well formed, -1 in case of args problem and |
---|
2417 | * the parser error code otherwise |
---|
2418 | */ |
---|
2419 | |
---|
2420 | static int |
---|
2421 | docbParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL, |
---|
2422 | const xmlChar *ID, xmlNodePtr *list) { |
---|
2423 | xmlParserCtxtPtr ctxt; |
---|
2424 | xmlDocPtr newDoc; |
---|
2425 | xmlSAXHandlerPtr oldsax = NULL; |
---|
2426 | int ret = 0; |
---|
2427 | |
---|
2428 | if (ctx->depth > 40) { |
---|
2429 | return(XML_ERR_ENTITY_LOOP); |
---|
2430 | } |
---|
2431 | |
---|
2432 | if (list != NULL) |
---|
2433 | *list = NULL; |
---|
2434 | if ((URL == NULL) && (ID == NULL)) |
---|
2435 | return(-1); |
---|
2436 | if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */ |
---|
2437 | return(-1); |
---|
2438 | |
---|
2439 | |
---|
2440 | ctxt = xmlCreateEntityParserCtxt(URL, ID, ctx->myDoc->URL); |
---|
2441 | if (ctxt == NULL) return(-1); |
---|
2442 | ctxt->userData = ctxt; |
---|
2443 | oldsax = ctxt->sax; |
---|
2444 | ctxt->sax = ctx->sax; |
---|
2445 | newDoc = xmlNewDoc(BAD_CAST "1.0"); |
---|
2446 | if (newDoc == NULL) { |
---|
2447 | xmlFreeParserCtxt(ctxt); |
---|
2448 | return(-1); |
---|
2449 | } |
---|
2450 | if (ctx->myDoc != NULL) { |
---|
2451 | newDoc->intSubset = ctx->myDoc->intSubset; |
---|
2452 | newDoc->extSubset = ctx->myDoc->extSubset; |
---|
2453 | } |
---|
2454 | if (ctx->myDoc->URL != NULL) { |
---|
2455 | newDoc->URL = xmlStrdup(ctx->myDoc->URL); |
---|
2456 | } |
---|
2457 | newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL); |
---|
2458 | if (newDoc->children == NULL) { |
---|
2459 | ctxt->sax = oldsax; |
---|
2460 | xmlFreeParserCtxt(ctxt); |
---|
2461 | newDoc->intSubset = NULL; |
---|
2462 | newDoc->extSubset = NULL; |
---|
2463 | xmlFreeDoc(newDoc); |
---|
2464 | return(-1); |
---|
2465 | } |
---|
2466 | nodePush(ctxt, newDoc->children); |
---|
2467 | if (ctx->myDoc == NULL) { |
---|
2468 | ctxt->myDoc = newDoc; |
---|
2469 | } else { |
---|
2470 | ctxt->myDoc = ctx->myDoc; |
---|
2471 | newDoc->children->doc = ctx->myDoc; |
---|
2472 | } |
---|
2473 | |
---|
2474 | /* |
---|
2475 | * Parse a possible text declaration first |
---|
2476 | */ |
---|
2477 | GROW; |
---|
2478 | if ((RAW == '<') && (NXT(1) == '?') && |
---|
2479 | (NXT(2) == 'x') && (NXT(3) == 'm') && |
---|
2480 | (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { |
---|
2481 | xmlParseTextDecl(ctxt); |
---|
2482 | } |
---|
2483 | |
---|
2484 | /* |
---|
2485 | * Doing validity checking on chunk doesn't make sense |
---|
2486 | */ |
---|
2487 | ctxt->instate = XML_PARSER_CONTENT; |
---|
2488 | ctxt->validate = ctx->validate; |
---|
2489 | ctxt->loadsubset = ctx->loadsubset; |
---|
2490 | ctxt->depth = ctx->depth + 1; |
---|
2491 | ctxt->replaceEntities = ctx->replaceEntities; |
---|
2492 | if (ctxt->validate) { |
---|
2493 | ctxt->vctxt.error = ctx->vctxt.error; |
---|
2494 | ctxt->vctxt.warning = ctx->vctxt.warning; |
---|
2495 | /* Allocate the Node stack */ |
---|
2496 | ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr)); |
---|
2497 | if (ctxt->vctxt.nodeTab == NULL) { |
---|
2498 | xmlGenericError(xmlGenericErrorContext, |
---|
2499 | "docbParseCtxtExternalEntity: out of memory\n"); |
---|
2500 | ctxt->validate = 0; |
---|
2501 | ctxt->vctxt.error = NULL; |
---|
2502 | ctxt->vctxt.warning = NULL; |
---|
2503 | } else { |
---|
2504 | ctxt->vctxt.nodeNr = 0; |
---|
2505 | ctxt->vctxt.nodeMax = 4; |
---|
2506 | ctxt->vctxt.node = NULL; |
---|
2507 | } |
---|
2508 | } else { |
---|
2509 | ctxt->vctxt.error = NULL; |
---|
2510 | ctxt->vctxt.warning = NULL; |
---|
2511 | } |
---|
2512 | |
---|
2513 | docbParseContent(ctxt); |
---|
2514 | |
---|
2515 | if ((RAW == '<') && (NXT(1) == '/')) { |
---|
2516 | ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; |
---|
2517 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
2518 | ctxt->sax->error(ctxt->userData, |
---|
2519 | "chunk is not well balanced\n"); |
---|
2520 | ctxt->wellFormed = 0; |
---|
2521 | ctxt->disableSAX = 1; |
---|
2522 | } else if (RAW != 0) { |
---|
2523 | ctxt->errNo = XML_ERR_EXTRA_CONTENT; |
---|
2524 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
2525 | ctxt->sax->error(ctxt->userData, |
---|
2526 | "extra content at the end of well balanced chunk\n"); |
---|
2527 | ctxt->wellFormed = 0; |
---|
2528 | ctxt->disableSAX = 1; |
---|
2529 | } |
---|
2530 | if (ctxt->node != newDoc->children) { |
---|
2531 | ctxt->errNo = XML_ERR_NOT_WELL_BALANCED; |
---|
2532 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
2533 | ctxt->sax->error(ctxt->userData, |
---|
2534 | "chunk is not well balanced\n"); |
---|
2535 | ctxt->wellFormed = 0; |
---|
2536 | ctxt->disableSAX = 1; |
---|
2537 | } |
---|
2538 | |
---|
2539 | if (!ctxt->wellFormed) { |
---|
2540 | if (ctxt->errNo == 0) |
---|
2541 | ret = 1; |
---|
2542 | else |
---|
2543 | ret = ctxt->errNo; |
---|
2544 | } else { |
---|
2545 | if (list != NULL) { |
---|
2546 | xmlNodePtr cur; |
---|
2547 | |
---|
2548 | /* |
---|
2549 | * Return the newly created nodeset after unlinking it from |
---|
2550 | * they pseudo parent. |
---|
2551 | */ |
---|
2552 | cur = newDoc->children->children; |
---|
2553 | *list = cur; |
---|
2554 | while (cur != NULL) { |
---|
2555 | cur->parent = NULL; |
---|
2556 | cur = cur->next; |
---|
2557 | } |
---|
2558 | newDoc->children->children = NULL; |
---|
2559 | } |
---|
2560 | ret = 0; |
---|
2561 | } |
---|
2562 | ctxt->sax = oldsax; |
---|
2563 | xmlFreeParserCtxt(ctxt); |
---|
2564 | newDoc->intSubset = NULL; |
---|
2565 | newDoc->extSubset = NULL; |
---|
2566 | xmlFreeDoc(newDoc); |
---|
2567 | |
---|
2568 | return(ret); |
---|
2569 | } |
---|
2570 | |
---|
2571 | /************************************************************************ |
---|
2572 | * * |
---|
2573 | * The parser itself * |
---|
2574 | * * |
---|
2575 | ************************************************************************/ |
---|
2576 | |
---|
2577 | /** |
---|
2578 | * docbParseSGMLName: |
---|
2579 | * @ctxt: an SGML parser context |
---|
2580 | * |
---|
2581 | * parse an SGML tag or attribute name, note that we convert it to lowercase |
---|
2582 | * since SGML names are not case-sensitive. |
---|
2583 | * |
---|
2584 | * Returns the Tag Name parsed or NULL |
---|
2585 | */ |
---|
2586 | |
---|
2587 | static xmlChar * |
---|
2588 | docbParseSGMLName(docbParserCtxtPtr ctxt) { |
---|
2589 | xmlChar *ret = NULL; |
---|
2590 | int i = 0; |
---|
2591 | xmlChar loc[DOCB_PARSER_BUFFER_SIZE]; |
---|
2592 | |
---|
2593 | if (!IS_LETTER(CUR) && (CUR != '_') && |
---|
2594 | (CUR != ':')) return(NULL); |
---|
2595 | |
---|
2596 | while ((i < DOCB_PARSER_BUFFER_SIZE) && |
---|
2597 | ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) || |
---|
2598 | (CUR == ':') || (CUR == '_'))) { |
---|
2599 | if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20; |
---|
2600 | else loc[i] = CUR; |
---|
2601 | i++; |
---|
2602 | |
---|
2603 | NEXT; |
---|
2604 | } |
---|
2605 | |
---|
2606 | ret = xmlStrndup(loc, i); |
---|
2607 | |
---|
2608 | return(ret); |
---|
2609 | } |
---|
2610 | |
---|
2611 | /** |
---|
2612 | * docbParseName: |
---|
2613 | * @ctxt: an SGML parser context |
---|
2614 | * |
---|
2615 | * parse an SGML name, this routine is case sensitive. |
---|
2616 | * |
---|
2617 | * Returns the Name parsed or NULL |
---|
2618 | */ |
---|
2619 | |
---|
2620 | static xmlChar * |
---|
2621 | docbParseName(docbParserCtxtPtr ctxt) { |
---|
2622 | xmlChar buf[DOCB_MAX_NAMELEN]; |
---|
2623 | int len = 0; |
---|
2624 | |
---|
2625 | GROW; |
---|
2626 | if (!IS_LETTER(CUR) && (CUR != '_')) { |
---|
2627 | return(NULL); |
---|
2628 | } |
---|
2629 | |
---|
2630 | while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) || |
---|
2631 | (CUR == '.') || (CUR == '-') || |
---|
2632 | (CUR == '_') || (CUR == ':') || |
---|
2633 | (IS_COMBINING(CUR)) || |
---|
2634 | (IS_EXTENDER(CUR))) { |
---|
2635 | buf[len++] = CUR; |
---|
2636 | NEXT; |
---|
2637 | if (len >= DOCB_MAX_NAMELEN) { |
---|
2638 | xmlGenericError(xmlGenericErrorContext, |
---|
2639 | "docbParseName: reached DOCB_MAX_NAMELEN limit\n"); |
---|
2640 | while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) || |
---|
2641 | (CUR == '.') || (CUR == '-') || |
---|
2642 | (CUR == '_') || (CUR == ':') || |
---|
2643 | (IS_COMBINING(CUR)) || |
---|
2644 | (IS_EXTENDER(CUR))) |
---|
2645 | NEXT; |
---|
2646 | break; |
---|
2647 | } |
---|
2648 | } |
---|
2649 | return(xmlStrndup(buf, len)); |
---|
2650 | } |
---|
2651 | |
---|
2652 | /** |
---|
2653 | * docbParseSGMLAttribute: |
---|
2654 | * @ctxt: an SGML parser context |
---|
2655 | * @stop: a char stop value |
---|
2656 | * |
---|
2657 | * parse an SGML attribute value till the stop (quote), if |
---|
2658 | * stop is 0 then it stops at the first space |
---|
2659 | * |
---|
2660 | * Returns the attribute parsed or NULL |
---|
2661 | */ |
---|
2662 | |
---|
2663 | static xmlChar * |
---|
2664 | docbParseSGMLAttribute(docbParserCtxtPtr ctxt, const xmlChar stop) { |
---|
2665 | xmlChar *buffer = NULL; |
---|
2666 | int buffer_size = 0; |
---|
2667 | xmlChar *out = NULL; |
---|
2668 | xmlChar *name = NULL; |
---|
2669 | |
---|
2670 | xmlChar *cur = NULL; |
---|
2671 | xmlEntityPtr xent; |
---|
2672 | docbEntityDescPtr ent; |
---|
2673 | |
---|
2674 | /* |
---|
2675 | * allocate a translation buffer. |
---|
2676 | */ |
---|
2677 | buffer_size = DOCB_PARSER_BIG_BUFFER_SIZE; |
---|
2678 | buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); |
---|
2679 | if (buffer == NULL) { |
---|
2680 | perror("docbParseSGMLAttribute: malloc failed"); |
---|
2681 | return(NULL); |
---|
2682 | } |
---|
2683 | out = buffer; |
---|
2684 | |
---|
2685 | /* |
---|
2686 | * Ok loop until we reach one of the ending chars |
---|
2687 | */ |
---|
2688 | while ((CUR != 0) && (CUR != stop) && (CUR != '>')) { |
---|
2689 | if ((stop == 0) && (IS_BLANK(CUR))) break; |
---|
2690 | if (CUR == '&') { |
---|
2691 | if (NXT(1) == '#') { |
---|
2692 | unsigned int c; |
---|
2693 | int bits; |
---|
2694 | |
---|
2695 | c = docbParseCharRef(ctxt); |
---|
2696 | if (c < 0x80) |
---|
2697 | { *out++ = c; bits= -6; } |
---|
2698 | else if (c < 0x800) |
---|
2699 | { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } |
---|
2700 | else if (c < 0x10000) |
---|
2701 | { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } |
---|
2702 | else |
---|
2703 | { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } |
---|
2704 | |
---|
2705 | for ( ; bits >= 0; bits-= 6) { |
---|
2706 | *out++ = ((c >> bits) & 0x3F) | 0x80; |
---|
2707 | } |
---|
2708 | } else { |
---|
2709 | xent = docbParseEntityRef(ctxt, &name); |
---|
2710 | if (name == NULL) { |
---|
2711 | *out++ = '&'; |
---|
2712 | if (out - buffer > buffer_size - 100) { |
---|
2713 | int indx = out - buffer; |
---|
2714 | |
---|
2715 | growBuffer(buffer); |
---|
2716 | out = &buffer[indx]; |
---|
2717 | } |
---|
2718 | *out++ = '&'; |
---|
2719 | } else { |
---|
2720 | ent = docbEntityLookup(name); |
---|
2721 | if (ent == NULL) { |
---|
2722 | *out++ = '&'; |
---|
2723 | cur = name; |
---|
2724 | while (*cur != 0) { |
---|
2725 | if (out - buffer > buffer_size - 100) { |
---|
2726 | int indx = out - buffer; |
---|
2727 | |
---|
2728 | growBuffer(buffer); |
---|
2729 | out = &buffer[indx]; |
---|
2730 | } |
---|
2731 | *out++ = *cur++; |
---|
2732 | } |
---|
2733 | xmlFree(name); |
---|
2734 | } else { |
---|
2735 | unsigned int c; |
---|
2736 | int bits; |
---|
2737 | |
---|
2738 | if (out - buffer > buffer_size - 100) { |
---|
2739 | int indx = out - buffer; |
---|
2740 | |
---|
2741 | growBuffer(buffer); |
---|
2742 | out = &buffer[indx]; |
---|
2743 | } |
---|
2744 | c = (xmlChar)ent->value; |
---|
2745 | if (c < 0x80) |
---|
2746 | { *out++ = c; bits= -6; } |
---|
2747 | else if (c < 0x800) |
---|
2748 | { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } |
---|
2749 | else if (c < 0x10000) |
---|
2750 | { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } |
---|
2751 | else |
---|
2752 | { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } |
---|
2753 | |
---|
2754 | for ( ; bits >= 0; bits-= 6) { |
---|
2755 | *out++ = ((c >> bits) & 0x3F) | 0x80; |
---|
2756 | } |
---|
2757 | xmlFree(name); |
---|
2758 | } |
---|
2759 | } |
---|
2760 | } |
---|
2761 | } else { |
---|
2762 | unsigned int c; |
---|
2763 | int bits; |
---|
2764 | |
---|
2765 | if (out - buffer > buffer_size - 100) { |
---|
2766 | int indx = out - buffer; |
---|
2767 | |
---|
2768 | growBuffer(buffer); |
---|
2769 | out = &buffer[indx]; |
---|
2770 | } |
---|
2771 | c = CUR; |
---|
2772 | if (c < 0x80) |
---|
2773 | { *out++ = c; bits= -6; } |
---|
2774 | else if (c < 0x800) |
---|
2775 | { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } |
---|
2776 | else if (c < 0x10000) |
---|
2777 | { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } |
---|
2778 | else |
---|
2779 | { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } |
---|
2780 | |
---|
2781 | for ( ; bits >= 0; bits-= 6) { |
---|
2782 | *out++ = ((c >> bits) & 0x3F) | 0x80; |
---|
2783 | } |
---|
2784 | NEXT; |
---|
2785 | } |
---|
2786 | } |
---|
2787 | *out++ = 0; |
---|
2788 | return(buffer); |
---|
2789 | } |
---|
2790 | |
---|
2791 | |
---|
2792 | /** |
---|
2793 | * docbParseEntityRef: |
---|
2794 | * @ctxt: an SGML parser context |
---|
2795 | * @str: location to store the entity name |
---|
2796 | * |
---|
2797 | * parse an SGML ENTITY references |
---|
2798 | * |
---|
2799 | * [68] EntityRef ::= '&' Name ';' |
---|
2800 | * |
---|
2801 | * Returns the associated xmlEntityPtr if found, or NULL otherwise, |
---|
2802 | * if non-NULL *str will have to be freed by the caller. |
---|
2803 | */ |
---|
2804 | static xmlEntityPtr |
---|
2805 | docbParseEntityRef(docbParserCtxtPtr ctxt, xmlChar **str) { |
---|
2806 | xmlChar *name; |
---|
2807 | xmlEntityPtr ent = NULL; |
---|
2808 | *str = NULL; |
---|
2809 | |
---|
2810 | if (CUR == '&') { |
---|
2811 | NEXT; |
---|
2812 | name = docbParseName(ctxt); |
---|
2813 | if (name == NULL) { |
---|
2814 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
2815 | ctxt->sax->error(ctxt->userData, |
---|
2816 | "docbParseEntityRef: no name\n"); |
---|
2817 | ctxt->wellFormed = 0; |
---|
2818 | } else { |
---|
2819 | GROW; |
---|
2820 | if (CUR == ';') { |
---|
2821 | *str = name; |
---|
2822 | |
---|
2823 | /* |
---|
2824 | * Ask first SAX for entity resolution, otherwise try the |
---|
2825 | * predefined set. |
---|
2826 | */ |
---|
2827 | if (ctxt->sax != NULL) { |
---|
2828 | if (ctxt->sax->getEntity != NULL) |
---|
2829 | ent = ctxt->sax->getEntity(ctxt->userData, name); |
---|
2830 | if (ent == NULL) |
---|
2831 | ent = xmlGetPredefinedEntity(name); |
---|
2832 | } |
---|
2833 | NEXT; |
---|
2834 | } else { |
---|
2835 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
2836 | ctxt->sax->error(ctxt->userData, |
---|
2837 | "docbParseEntityRef: expecting ';'\n"); |
---|
2838 | *str = name; |
---|
2839 | } |
---|
2840 | } |
---|
2841 | } |
---|
2842 | return(ent); |
---|
2843 | } |
---|
2844 | |
---|
2845 | /** |
---|
2846 | * docbParseAttValue: |
---|
2847 | * @ctxt: an SGML parser context |
---|
2848 | * |
---|
2849 | * parse a value for an attribute |
---|
2850 | * Note: the parser won't do substitution of entities here, this |
---|
2851 | * will be handled later in xmlStringGetNodeList, unless it was |
---|
2852 | * asked for ctxt->replaceEntities != 0 |
---|
2853 | * |
---|
2854 | * Returns the AttValue parsed or NULL. |
---|
2855 | */ |
---|
2856 | |
---|
2857 | static xmlChar * |
---|
2858 | docbParseAttValue(docbParserCtxtPtr ctxt) { |
---|
2859 | xmlChar *ret = NULL; |
---|
2860 | |
---|
2861 | if (CUR == '"') { |
---|
2862 | NEXT; |
---|
2863 | ret = docbParseSGMLAttribute(ctxt, '"'); |
---|
2864 | if (CUR != '"') { |
---|
2865 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
2866 | ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n"); |
---|
2867 | ctxt->wellFormed = 0; |
---|
2868 | } else |
---|
2869 | NEXT; |
---|
2870 | } else if (CUR == '\'') { |
---|
2871 | NEXT; |
---|
2872 | ret = docbParseSGMLAttribute(ctxt, '\''); |
---|
2873 | if (CUR != '\'') { |
---|
2874 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
2875 | ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n"); |
---|
2876 | ctxt->wellFormed = 0; |
---|
2877 | } else |
---|
2878 | NEXT; |
---|
2879 | } else { |
---|
2880 | /* |
---|
2881 | * That's an SGMLism, the attribute value may not be quoted |
---|
2882 | */ |
---|
2883 | ret = docbParseSGMLAttribute(ctxt, 0); |
---|
2884 | if (ret == NULL) { |
---|
2885 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
2886 | ctxt->sax->error(ctxt->userData, "AttValue: no value found\n"); |
---|
2887 | ctxt->wellFormed = 0; |
---|
2888 | } |
---|
2889 | } |
---|
2890 | return(ret); |
---|
2891 | } |
---|
2892 | |
---|
2893 | /** |
---|
2894 | * docbParseSystemLiteral: |
---|
2895 | * @ctxt: an SGML parser context |
---|
2896 | * |
---|
2897 | * parse an SGML Literal |
---|
2898 | * |
---|
2899 | * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") |
---|
2900 | * |
---|
2901 | * Returns the SystemLiteral parsed or NULL |
---|
2902 | */ |
---|
2903 | |
---|
2904 | static xmlChar * |
---|
2905 | docbParseSystemLiteral(docbParserCtxtPtr ctxt) { |
---|
2906 | const xmlChar *q; |
---|
2907 | xmlChar *ret = NULL; |
---|
2908 | |
---|
2909 | if (CUR == '"') { |
---|
2910 | NEXT; |
---|
2911 | q = CUR_PTR; |
---|
2912 | while ((IS_CHAR(CUR)) && (CUR != '"')) |
---|
2913 | NEXT; |
---|
2914 | if (!IS_CHAR(CUR)) { |
---|
2915 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
2916 | ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n"); |
---|
2917 | ctxt->wellFormed = 0; |
---|
2918 | } else { |
---|
2919 | ret = xmlStrndup(q, CUR_PTR - q); |
---|
2920 | NEXT; |
---|
2921 | } |
---|
2922 | } else if (CUR == '\'') { |
---|
2923 | NEXT; |
---|
2924 | q = CUR_PTR; |
---|
2925 | while ((IS_CHAR(CUR)) && (CUR != '\'')) |
---|
2926 | NEXT; |
---|
2927 | if (!IS_CHAR(CUR)) { |
---|
2928 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
2929 | ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n"); |
---|
2930 | ctxt->wellFormed = 0; |
---|
2931 | } else { |
---|
2932 | ret = xmlStrndup(q, CUR_PTR - q); |
---|
2933 | NEXT; |
---|
2934 | } |
---|
2935 | } else { |
---|
2936 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
2937 | ctxt->sax->error(ctxt->userData, |
---|
2938 | "SystemLiteral \" or ' expected\n"); |
---|
2939 | ctxt->wellFormed = 0; |
---|
2940 | } |
---|
2941 | |
---|
2942 | return(ret); |
---|
2943 | } |
---|
2944 | |
---|
2945 | /** |
---|
2946 | * docbParsePubidLiteral: |
---|
2947 | * @ctxt: an SGML parser context |
---|
2948 | * |
---|
2949 | * parse an SGML public literal |
---|
2950 | * |
---|
2951 | * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" |
---|
2952 | * |
---|
2953 | * Returns the PubidLiteral parsed or NULL. |
---|
2954 | */ |
---|
2955 | |
---|
2956 | static xmlChar * |
---|
2957 | docbParsePubidLiteral(docbParserCtxtPtr ctxt) { |
---|
2958 | const xmlChar *q; |
---|
2959 | xmlChar *ret = NULL; |
---|
2960 | /* |
---|
2961 | * Name ::= (Letter | '_') (NameChar)* |
---|
2962 | */ |
---|
2963 | if (CUR == '"') { |
---|
2964 | NEXT; |
---|
2965 | q = CUR_PTR; |
---|
2966 | while (IS_PUBIDCHAR(CUR)) NEXT; |
---|
2967 | if (CUR != '"') { |
---|
2968 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
2969 | ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n"); |
---|
2970 | ctxt->wellFormed = 0; |
---|
2971 | } else { |
---|
2972 | ret = xmlStrndup(q, CUR_PTR - q); |
---|
2973 | NEXT; |
---|
2974 | } |
---|
2975 | } else if (CUR == '\'') { |
---|
2976 | NEXT; |
---|
2977 | q = CUR_PTR; |
---|
2978 | while ((IS_LETTER(CUR)) && (CUR != '\'')) |
---|
2979 | NEXT; |
---|
2980 | if (!IS_LETTER(CUR)) { |
---|
2981 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
2982 | ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n"); |
---|
2983 | ctxt->wellFormed = 0; |
---|
2984 | } else { |
---|
2985 | ret = xmlStrndup(q, CUR_PTR - q); |
---|
2986 | NEXT; |
---|
2987 | } |
---|
2988 | } else { |
---|
2989 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
2990 | ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n"); |
---|
2991 | ctxt->wellFormed = 0; |
---|
2992 | } |
---|
2993 | |
---|
2994 | return(ret); |
---|
2995 | } |
---|
2996 | |
---|
2997 | /** |
---|
2998 | * docbParseCharData: |
---|
2999 | * @ctxt: an SGML parser context |
---|
3000 | * @cdata: int indicating whether we are within a CDATA section |
---|
3001 | * |
---|
3002 | * parse a CharData section. |
---|
3003 | * if we are within a CDATA section ']]>' marks an end of section. |
---|
3004 | * |
---|
3005 | * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) |
---|
3006 | */ |
---|
3007 | |
---|
3008 | static void |
---|
3009 | docbParseCharData(docbParserCtxtPtr ctxt) { |
---|
3010 | xmlChar buf[DOCB_PARSER_BIG_BUFFER_SIZE + 5]; |
---|
3011 | int nbchar = 0; |
---|
3012 | int cur, l; |
---|
3013 | |
---|
3014 | SHRINK; |
---|
3015 | cur = CUR_CHAR(l); |
---|
3016 | while (((cur != '<') || (ctxt->token == '<')) && |
---|
3017 | ((cur != '&') || (ctxt->token == '&')) && |
---|
3018 | (IS_CHAR(cur))) { |
---|
3019 | COPY_BUF(l,buf,nbchar,cur); |
---|
3020 | if (nbchar >= DOCB_PARSER_BIG_BUFFER_SIZE) { |
---|
3021 | /* |
---|
3022 | * Ok the segment is to be consumed as chars. |
---|
3023 | */ |
---|
3024 | if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
---|
3025 | if (areBlanks(ctxt, buf, nbchar)) { |
---|
3026 | if (ctxt->sax->ignorableWhitespace != NULL) |
---|
3027 | ctxt->sax->ignorableWhitespace(ctxt->userData, |
---|
3028 | buf, nbchar); |
---|
3029 | } else { |
---|
3030 | if (ctxt->sax->characters != NULL) |
---|
3031 | ctxt->sax->characters(ctxt->userData, buf, nbchar); |
---|
3032 | } |
---|
3033 | } |
---|
3034 | nbchar = 0; |
---|
3035 | } |
---|
3036 | NEXTL(l); |
---|
3037 | cur = CUR_CHAR(l); |
---|
3038 | } |
---|
3039 | if (nbchar != 0) { |
---|
3040 | /* |
---|
3041 | * Ok the segment is to be consumed as chars. |
---|
3042 | */ |
---|
3043 | if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
---|
3044 | if (areBlanks(ctxt, buf, nbchar)) { |
---|
3045 | if (ctxt->sax->ignorableWhitespace != NULL) |
---|
3046 | ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); |
---|
3047 | } else { |
---|
3048 | if (ctxt->sax->characters != NULL) |
---|
3049 | ctxt->sax->characters(ctxt->userData, buf, nbchar); |
---|
3050 | } |
---|
3051 | } |
---|
3052 | } |
---|
3053 | } |
---|
3054 | |
---|
3055 | /** |
---|
3056 | * docbParseExternalID: |
---|
3057 | * @ctxt: an SGML parser context |
---|
3058 | * @publicID: a xmlChar** receiving PubidLiteral |
---|
3059 | * |
---|
3060 | * Parse an External ID or a Public ID |
---|
3061 | * |
---|
3062 | * Returns the function returns SystemLiteral and in the second |
---|
3063 | * case publicID receives PubidLiteral, |
---|
3064 | * it is possible to return NULL and have publicID set. |
---|
3065 | */ |
---|
3066 | |
---|
3067 | static xmlChar * |
---|
3068 | docbParseExternalID(docbParserCtxtPtr ctxt, xmlChar **publicID) { |
---|
3069 | xmlChar *URI = NULL; |
---|
3070 | |
---|
3071 | if ((UPPER == 'S') && (UPP(1) == 'Y') && |
---|
3072 | (UPP(2) == 'S') && (UPP(3) == 'T') && |
---|
3073 | (UPP(4) == 'E') && (UPP(5) == 'M')) { |
---|
3074 | SKIP(6); |
---|
3075 | if (!IS_BLANK(CUR)) { |
---|
3076 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
3077 | ctxt->sax->error(ctxt->userData, |
---|
3078 | "Space required after 'SYSTEM'\n"); |
---|
3079 | ctxt->wellFormed = 0; |
---|
3080 | } |
---|
3081 | SKIP_BLANKS; |
---|
3082 | URI = docbParseSystemLiteral(ctxt); |
---|
3083 | if (URI == NULL) { |
---|
3084 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
3085 | ctxt->sax->error(ctxt->userData, |
---|
3086 | "docbParseExternalID: SYSTEM, no URI\n"); |
---|
3087 | ctxt->wellFormed = 0; |
---|
3088 | } |
---|
3089 | } else if ((UPPER == 'P') && (UPP(1) == 'U') && |
---|
3090 | (UPP(2) == 'B') && (UPP(3) == 'L') && |
---|
3091 | (UPP(4) == 'I') && (UPP(5) == 'C')) { |
---|
3092 | SKIP(6); |
---|
3093 | if (!IS_BLANK(CUR)) { |
---|
3094 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
3095 | ctxt->sax->error(ctxt->userData, |
---|
3096 | "Space required after 'PUBLIC'\n"); |
---|
3097 | ctxt->wellFormed = 0; |
---|
3098 | } |
---|
3099 | SKIP_BLANKS; |
---|
3100 | *publicID = docbParsePubidLiteral(ctxt); |
---|
3101 | if (*publicID == NULL) { |
---|
3102 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
3103 | ctxt->sax->error(ctxt->userData, |
---|
3104 | "docbParseExternalID: PUBLIC, no Public Identifier\n"); |
---|
3105 | ctxt->wellFormed = 0; |
---|
3106 | } |
---|
3107 | SKIP_BLANKS; |
---|
3108 | if ((CUR == '"') || (CUR == '\'')) { |
---|
3109 | URI = docbParseSystemLiteral(ctxt); |
---|
3110 | } |
---|
3111 | } |
---|
3112 | return(URI); |
---|
3113 | } |
---|
3114 | |
---|
3115 | /** |
---|
3116 | * docbParsePI: |
---|
3117 | * @ctxt: an XML parser context |
---|
3118 | * |
---|
3119 | * parse an XML Processing Instruction. |
---|
3120 | * |
---|
3121 | * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' |
---|
3122 | * |
---|
3123 | * The processing is transfered to SAX once parsed. |
---|
3124 | */ |
---|
3125 | |
---|
3126 | static void |
---|
3127 | docbParsePI(xmlParserCtxtPtr ctxt) { |
---|
3128 | xmlChar *buf = NULL; |
---|
3129 | int len = 0; |
---|
3130 | int size = DOCB_PARSER_BUFFER_SIZE; |
---|
3131 | int cur, l; |
---|
3132 | xmlChar *target; |
---|
3133 | xmlParserInputState state; |
---|
3134 | int count = 0; |
---|
3135 | |
---|
3136 | if ((RAW == '<') && (NXT(1) == '?')) { |
---|
3137 | xmlParserInputPtr input = ctxt->input; |
---|
3138 | state = ctxt->instate; |
---|
3139 | ctxt->instate = XML_PARSER_PI; |
---|
3140 | /* |
---|
3141 | * this is a Processing Instruction. |
---|
3142 | */ |
---|
3143 | SKIP(2); |
---|
3144 | SHRINK; |
---|
3145 | |
---|
3146 | /* |
---|
3147 | * Parse the target name and check for special support like |
---|
3148 | * namespace. |
---|
3149 | */ |
---|
3150 | target = xmlParseName(ctxt); |
---|
3151 | if (target != NULL) { |
---|
3152 | xmlChar *encoding = NULL; |
---|
3153 | |
---|
3154 | if ((RAW == '?') && (NXT(1) == '>')) { |
---|
3155 | if (input != ctxt->input) { |
---|
3156 | ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; |
---|
3157 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
3158 | ctxt->sax->error(ctxt->userData, |
---|
3159 | "PI declaration doesn't start and stop in the same entity\n"); |
---|
3160 | ctxt->wellFormed = 0; |
---|
3161 | ctxt->disableSAX = 1; |
---|
3162 | } |
---|
3163 | SKIP(2); |
---|
3164 | |
---|
3165 | /* |
---|
3166 | * SAX: PI detected. |
---|
3167 | */ |
---|
3168 | if ((ctxt->sax) && (!ctxt->disableSAX) && |
---|
3169 | (ctxt->sax->processingInstruction != NULL)) |
---|
3170 | ctxt->sax->processingInstruction(ctxt->userData, |
---|
3171 | target, NULL); |
---|
3172 | ctxt->instate = state; |
---|
3173 | xmlFree(target); |
---|
3174 | return; |
---|
3175 | } |
---|
3176 | if (xmlStrEqual(target, BAD_CAST "sgml-declaration")) { |
---|
3177 | |
---|
3178 | encoding = xmlParseEncodingDecl(ctxt); |
---|
3179 | if (encoding == NULL) { |
---|
3180 | xmlGenericError(xmlGenericErrorContext, |
---|
3181 | "sgml-declaration: failed to find/handle encoding\n"); |
---|
3182 | #ifdef DEBUG |
---|
3183 | } else { |
---|
3184 | xmlGenericError(xmlGenericErrorContext, |
---|
3185 | "switched to encoding %s\n", encoding); |
---|
3186 | #endif |
---|
3187 | } |
---|
3188 | |
---|
3189 | } |
---|
3190 | buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); |
---|
3191 | if (buf == NULL) { |
---|
3192 | xmlGenericError(xmlGenericErrorContext, |
---|
3193 | "malloc of %d byte failed\n", size); |
---|
3194 | ctxt->instate = state; |
---|
3195 | return; |
---|
3196 | } |
---|
3197 | cur = CUR; |
---|
3198 | if (encoding != NULL) { |
---|
3199 | len = snprintf((char *) buf, size - 1, |
---|
3200 | " encoding = \"%s\"", encoding); |
---|
3201 | if (len < 0) |
---|
3202 | len = size; |
---|
3203 | } else { |
---|
3204 | if (!IS_BLANK(cur)) { |
---|
3205 | ctxt->errNo = XML_ERR_SPACE_REQUIRED; |
---|
3206 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
3207 | ctxt->sax->error(ctxt->userData, |
---|
3208 | "docbParsePI: PI %s space expected\n", target); |
---|
3209 | ctxt->wellFormed = 0; |
---|
3210 | ctxt->disableSAX = 1; |
---|
3211 | } |
---|
3212 | SKIP_BLANKS; |
---|
3213 | } |
---|
3214 | cur = CUR_CHAR(l); |
---|
3215 | while (IS_CHAR(cur) && /* checked */ |
---|
3216 | ((cur != '?') || (NXT(1) != '>'))) { |
---|
3217 | if (len + 5 >= size) { |
---|
3218 | size *= 2; |
---|
3219 | buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); |
---|
3220 | if (buf == NULL) { |
---|
3221 | xmlGenericError(xmlGenericErrorContext, |
---|
3222 | "realloc of %d byte failed\n", size); |
---|
3223 | ctxt->instate = state; |
---|
3224 | return; |
---|
3225 | } |
---|
3226 | } |
---|
3227 | count++; |
---|
3228 | if (count > 50) { |
---|
3229 | GROW; |
---|
3230 | count = 0; |
---|
3231 | } |
---|
3232 | COPY_BUF(l,buf,len,cur); |
---|
3233 | NEXTL(l); |
---|
3234 | cur = CUR_CHAR(l); |
---|
3235 | if (cur == 0) { |
---|
3236 | SHRINK; |
---|
3237 | GROW; |
---|
3238 | cur = CUR_CHAR(l); |
---|
3239 | } |
---|
3240 | } |
---|
3241 | buf[len] = 0; |
---|
3242 | if (cur != '?') { |
---|
3243 | ctxt->errNo = XML_ERR_PI_NOT_FINISHED; |
---|
3244 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
3245 | ctxt->sax->error(ctxt->userData, |
---|
3246 | "docbParsePI: PI %s never end ...\n", target); |
---|
3247 | ctxt->wellFormed = 0; |
---|
3248 | ctxt->disableSAX = 1; |
---|
3249 | } else { |
---|
3250 | if (input != ctxt->input) { |
---|
3251 | ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; |
---|
3252 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
3253 | ctxt->sax->error(ctxt->userData, |
---|
3254 | "PI declaration doesn't start and stop in the same entity\n"); |
---|
3255 | ctxt->wellFormed = 0; |
---|
3256 | ctxt->disableSAX = 1; |
---|
3257 | } |
---|
3258 | SKIP(2); |
---|
3259 | |
---|
3260 | /* |
---|
3261 | * SAX: PI detected. |
---|
3262 | */ |
---|
3263 | if ((ctxt->sax) && (!ctxt->disableSAX) && |
---|
3264 | (ctxt->sax->processingInstruction != NULL)) |
---|
3265 | ctxt->sax->processingInstruction(ctxt->userData, |
---|
3266 | target, buf); |
---|
3267 | } |
---|
3268 | xmlFree(buf); |
---|
3269 | xmlFree(target); |
---|
3270 | } else { |
---|
3271 | ctxt->errNo = XML_ERR_PI_NOT_STARTED; |
---|
3272 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
3273 | ctxt->sax->error(ctxt->userData, |
---|
3274 | "docbParsePI : no target name\n"); |
---|
3275 | ctxt->wellFormed = 0; |
---|
3276 | ctxt->disableSAX = 1; |
---|
3277 | } |
---|
3278 | ctxt->instate = state; |
---|
3279 | } |
---|
3280 | } |
---|
3281 | |
---|
3282 | /** |
---|
3283 | * docbParseComment: |
---|
3284 | * @ctxt: an SGML parser context |
---|
3285 | * |
---|
3286 | * Parse an XML (SGML) comment <!-- .... --> |
---|
3287 | * |
---|
3288 | * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' |
---|
3289 | */ |
---|
3290 | static void |
---|
3291 | docbParseComment(docbParserCtxtPtr ctxt) { |
---|
3292 | xmlChar *buf = NULL; |
---|
3293 | int len; |
---|
3294 | int size = DOCB_PARSER_BUFFER_SIZE; |
---|
3295 | int q, ql; |
---|
3296 | int r, rl; |
---|
3297 | int cur, l; |
---|
3298 | xmlParserInputState state; |
---|
3299 | |
---|
3300 | /* |
---|
3301 | * Check that there is a comment right here. |
---|
3302 | */ |
---|
3303 | if ((RAW != '<') || (NXT(1) != '!') || |
---|
3304 | (NXT(2) != '-') || (NXT(3) != '-')) return; |
---|
3305 | |
---|
3306 | state = ctxt->instate; |
---|
3307 | ctxt->instate = XML_PARSER_COMMENT; |
---|
3308 | SHRINK; |
---|
3309 | SKIP(4); |
---|
3310 | buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); |
---|
3311 | if (buf == NULL) { |
---|
3312 | xmlGenericError(xmlGenericErrorContext, |
---|
3313 | "malloc of %d byte failed\n", size); |
---|
3314 | ctxt->instate = state; |
---|
3315 | return; |
---|
3316 | } |
---|
3317 | q = CUR_CHAR(ql); |
---|
3318 | NEXTL(ql); |
---|
3319 | r = CUR_CHAR(rl); |
---|
3320 | NEXTL(rl); |
---|
3321 | cur = CUR_CHAR(l); |
---|
3322 | len = 0; |
---|
3323 | while (IS_CHAR(cur) && |
---|
3324 | ((cur != '>') || |
---|
3325 | (r != '-') || (q != '-'))) { |
---|
3326 | if (len + 5 >= size) { |
---|
3327 | size *= 2; |
---|
3328 | buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); |
---|
3329 | if (buf == NULL) { |
---|
3330 | xmlGenericError(xmlGenericErrorContext, |
---|
3331 | "realloc of %d byte failed\n", size); |
---|
3332 | ctxt->instate = state; |
---|
3333 | return; |
---|
3334 | } |
---|
3335 | } |
---|
3336 | COPY_BUF(ql,buf,len,q); |
---|
3337 | q = r; |
---|
3338 | ql = rl; |
---|
3339 | r = cur; |
---|
3340 | rl = l; |
---|
3341 | NEXTL(l); |
---|
3342 | cur = CUR_CHAR(l); |
---|
3343 | if (cur == 0) { |
---|
3344 | SHRINK; |
---|
3345 | GROW; |
---|
3346 | cur = CUR_CHAR(l); |
---|
3347 | } |
---|
3348 | } |
---|
3349 | buf[len] = 0; |
---|
3350 | if (!IS_CHAR(cur)) { |
---|
3351 | ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED; |
---|
3352 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
3353 | ctxt->sax->error(ctxt->userData, |
---|
3354 | "Comment not terminated \n<!--%.50s\n", buf); |
---|
3355 | ctxt->wellFormed = 0; |
---|
3356 | xmlFree(buf); |
---|
3357 | } else { |
---|
3358 | NEXT; |
---|
3359 | if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && |
---|
3360 | (!ctxt->disableSAX)) |
---|
3361 | ctxt->sax->comment(ctxt->userData, buf); |
---|
3362 | xmlFree(buf); |
---|
3363 | } |
---|
3364 | ctxt->instate = state; |
---|
3365 | } |
---|
3366 | |
---|
3367 | /** |
---|
3368 | * docbParseCharRef: |
---|
3369 | * @ctxt: an SGML parser context |
---|
3370 | * |
---|
3371 | * parse Reference declarations |
---|
3372 | * |
---|
3373 | * [66] CharRef ::= '&#' [0-9]+ ';' | |
---|
3374 | * '&#x' [0-9a-fA-F]+ ';' |
---|
3375 | * |
---|
3376 | * Returns the value parsed (as an int) |
---|
3377 | */ |
---|
3378 | static int |
---|
3379 | docbParseCharRef(docbParserCtxtPtr ctxt) { |
---|
3380 | int val = 0; |
---|
3381 | |
---|
3382 | if ((CUR == '&') && (NXT(1) == '#') && |
---|
3383 | (NXT(2) == 'x')) { |
---|
3384 | SKIP(3); |
---|
3385 | while (CUR != ';') { |
---|
3386 | if ((CUR >= '0') && (CUR <= '9')) |
---|
3387 | val = val * 16 + (CUR - '0'); |
---|
3388 | else if ((CUR >= 'a') && (CUR <= 'f')) |
---|
3389 | val = val * 16 + (CUR - 'a') + 10; |
---|
3390 | else if ((CUR >= 'A') && (CUR <= 'F')) |
---|
3391 | val = val * 16 + (CUR - 'A') + 10; |
---|
3392 | else { |
---|
3393 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
3394 | ctxt->sax->error(ctxt->userData, |
---|
3395 | "docbParseCharRef: invalid hexadecimal value\n"); |
---|
3396 | ctxt->wellFormed = 0; |
---|
3397 | val = 0; |
---|
3398 | break; |
---|
3399 | } |
---|
3400 | NEXT; |
---|
3401 | } |
---|
3402 | if (CUR == ';') |
---|
3403 | NEXT; |
---|
3404 | } else if ((CUR == '&') && (NXT(1) == '#')) { |
---|
3405 | SKIP(2); |
---|
3406 | while (CUR != ';') { |
---|
3407 | if ((CUR >= '0') && (CUR <= '9')) |
---|
3408 | val = val * 10 + (CUR - '0'); |
---|
3409 | else { |
---|
3410 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
3411 | ctxt->sax->error(ctxt->userData, |
---|
3412 | "docbParseCharRef: invalid decimal value\n"); |
---|
3413 | ctxt->wellFormed = 0; |
---|
3414 | val = 0; |
---|
3415 | break; |
---|
3416 | } |
---|
3417 | NEXT; |
---|
3418 | } |
---|
3419 | if (CUR == ';') |
---|
3420 | NEXT; |
---|
3421 | } else { |
---|
3422 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
3423 | ctxt->sax->error(ctxt->userData, "docbParseCharRef: invalid value\n"); |
---|
3424 | ctxt->wellFormed = 0; |
---|
3425 | } |
---|
3426 | /* |
---|
3427 | * Check the value IS_CHAR ... |
---|
3428 | */ |
---|
3429 | if (IS_CHAR(val)) { |
---|
3430 | return(val); |
---|
3431 | } else { |
---|
3432 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
3433 | ctxt->sax->error(ctxt->userData, "docbParseCharRef: invalid xmlChar value %d\n", |
---|
3434 | val); |
---|
3435 | ctxt->wellFormed = 0; |
---|
3436 | } |
---|
3437 | return(0); |
---|
3438 | } |
---|
3439 | |
---|
3440 | |
---|
3441 | /** |
---|
3442 | * docbParseDocTypeDecl : |
---|
3443 | * @ctxt: an SGML parser context |
---|
3444 | * |
---|
3445 | * parse a DOCTYPE declaration |
---|
3446 | * |
---|
3447 | * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? |
---|
3448 | * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' |
---|
3449 | */ |
---|
3450 | |
---|
3451 | static void |
---|
3452 | docbParseDocTypeDecl(docbParserCtxtPtr ctxt) { |
---|
3453 | xmlChar *name; |
---|
3454 | xmlChar *ExternalID = NULL; |
---|
3455 | xmlChar *URI = NULL; |
---|
3456 | |
---|
3457 | /* |
---|
3458 | * We know that '<!DOCTYPE' has been detected. |
---|
3459 | */ |
---|
3460 | SKIP(9); |
---|
3461 | |
---|
3462 | SKIP_BLANKS; |
---|
3463 | |
---|
3464 | /* |
---|
3465 | * Parse the DOCTYPE name. |
---|
3466 | */ |
---|
3467 | name = docbParseName(ctxt); |
---|
3468 | if (name == NULL) { |
---|
3469 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
3470 | ctxt->sax->error(ctxt->userData, "docbParseDocTypeDecl : no DOCTYPE name !\n"); |
---|
3471 | ctxt->wellFormed = 0; |
---|
3472 | } |
---|
3473 | /* |
---|
3474 | * Check that upper(name) == "SGML" !!!!!!!!!!!!! |
---|
3475 | */ |
---|
3476 | |
---|
3477 | SKIP_BLANKS; |
---|
3478 | |
---|
3479 | /* |
---|
3480 | * Check for SystemID and ExternalID |
---|
3481 | */ |
---|
3482 | URI = docbParseExternalID(ctxt, &ExternalID); |
---|
3483 | SKIP_BLANKS; |
---|
3484 | |
---|
3485 | /* |
---|
3486 | * Create or update the document accordingly to the DOCTYPE |
---|
3487 | * But use the predefined PUBLIC and SYSTEM ID of DocBook XML |
---|
3488 | */ |
---|
3489 | if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && |
---|
3490 | (!ctxt->disableSAX)) |
---|
3491 | ctxt->sax->internalSubset(ctxt->userData, name, |
---|
3492 | XML_DOCBOOK_XML_PUBLIC, |
---|
3493 | XML_DOCBOOK_XML_SYSTEM); |
---|
3494 | |
---|
3495 | if (RAW != '>') { |
---|
3496 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
3497 | ctxt->sax->error(ctxt->userData, |
---|
3498 | "docbParseDocTypeDecl : internal subset not handled\n"); |
---|
3499 | } else { |
---|
3500 | NEXT; |
---|
3501 | } |
---|
3502 | |
---|
3503 | /* |
---|
3504 | * Cleanup, since we don't use all those identifiers |
---|
3505 | */ |
---|
3506 | if (URI != NULL) xmlFree(URI); |
---|
3507 | if (ExternalID != NULL) xmlFree(ExternalID); |
---|
3508 | if (name != NULL) xmlFree(name); |
---|
3509 | } |
---|
3510 | |
---|
3511 | /** |
---|
3512 | * docbParseAttribute: |
---|
3513 | * @ctxt: an SGML parser context |
---|
3514 | * @value: a xmlChar ** used to store the value of the attribute |
---|
3515 | * |
---|
3516 | * parse an attribute |
---|
3517 | * |
---|
3518 | * [41] Attribute ::= Name Eq AttValue |
---|
3519 | * |
---|
3520 | * [25] Eq ::= S? '=' S? |
---|
3521 | * |
---|
3522 | * With namespace: |
---|
3523 | * |
---|
3524 | * [NS 11] Attribute ::= QName Eq AttValue |
---|
3525 | * |
---|
3526 | * Also the case QName == xmlns:??? is handled independently as a namespace |
---|
3527 | * definition. |
---|
3528 | * |
---|
3529 | * Returns the attribute name, and the value in *value. |
---|
3530 | */ |
---|
3531 | |
---|
3532 | static xmlChar * |
---|
3533 | docbParseAttribute(docbParserCtxtPtr ctxt, xmlChar **value) { |
---|
3534 | xmlChar *name, *val = NULL; |
---|
3535 | |
---|
3536 | *value = NULL; |
---|
3537 | name = docbParseName(ctxt); |
---|
3538 | if (name == NULL) { |
---|
3539 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
3540 | ctxt->sax->error(ctxt->userData, "error parsing attribute name\n"); |
---|
3541 | ctxt->wellFormed = 0; |
---|
3542 | return(NULL); |
---|
3543 | } |
---|
3544 | |
---|
3545 | /* |
---|
3546 | * read the value |
---|
3547 | */ |
---|
3548 | SKIP_BLANKS; |
---|
3549 | if (CUR == '=') { |
---|
3550 | NEXT; |
---|
3551 | SKIP_BLANKS; |
---|
3552 | val = docbParseAttValue(ctxt); |
---|
3553 | /****** |
---|
3554 | } else { |
---|
3555 | * TODO : some attribute must have values, some may not |
---|
3556 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
3557 | ctxt->sax->warning(ctxt->userData, |
---|
3558 | "No value for attribute %s\n", name); */ |
---|
3559 | } |
---|
3560 | |
---|
3561 | *value = val; |
---|
3562 | return(name); |
---|
3563 | } |
---|
3564 | |
---|
3565 | /** |
---|
3566 | * docbCheckEncoding: |
---|
3567 | * @ctxt: an SGML parser context |
---|
3568 | * @attvalue: the attribute value |
---|
3569 | * |
---|
3570 | * Checks an http-equiv attribute from a Meta tag to detect |
---|
3571 | * the encoding |
---|
3572 | * If a new encoding is detected the parser is switched to decode |
---|
3573 | * it and pass UTF8 |
---|
3574 | */ |
---|
3575 | static void |
---|
3576 | docbCheckEncoding(docbParserCtxtPtr ctxt, const xmlChar *attvalue) { |
---|
3577 | const xmlChar *encoding; |
---|
3578 | |
---|
3579 | if ((ctxt == NULL) || (attvalue == NULL)) |
---|
3580 | return; |
---|
3581 | |
---|
3582 | encoding = xmlStrstr(attvalue, BAD_CAST"charset="); |
---|
3583 | if (encoding == NULL) |
---|
3584 | encoding = xmlStrstr(attvalue, BAD_CAST"Charset="); |
---|
3585 | if (encoding == NULL) |
---|
3586 | encoding = xmlStrstr(attvalue, BAD_CAST"CHARSET="); |
---|
3587 | if (encoding != NULL) { |
---|
3588 | encoding += 8; |
---|
3589 | } else { |
---|
3590 | encoding = xmlStrstr(attvalue, BAD_CAST"charset ="); |
---|
3591 | if (encoding == NULL) |
---|
3592 | encoding = xmlStrstr(attvalue, BAD_CAST"Charset ="); |
---|
3593 | if (encoding == NULL) |
---|
3594 | encoding = xmlStrstr(attvalue, BAD_CAST"CHARSET ="); |
---|
3595 | if (encoding != NULL) |
---|
3596 | encoding += 9; |
---|
3597 | } |
---|
3598 | /* |
---|
3599 | * Restricted from 2.3.5 */ |
---|
3600 | if (encoding != NULL) { |
---|
3601 | xmlCharEncoding enc; |
---|
3602 | |
---|
3603 | if (ctxt->input->encoding != NULL) |
---|
3604 | xmlFree((xmlChar *) ctxt->input->encoding); |
---|
3605 | ctxt->input->encoding = encoding; |
---|
3606 | |
---|
3607 | enc = xmlParseCharEncoding((const char *) encoding); |
---|
3608 | if (enc == XML_CHAR_ENCODING_8859_1) { |
---|
3609 | ctxt->charset = XML_CHAR_ENCODING_8859_1; |
---|
3610 | } else if (enc != XML_CHAR_ENCODING_UTF8) { |
---|
3611 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
3612 | ctxt->sax->error(ctxt->userData, |
---|
3613 | "Unsupported encoding %s\n", encoding); |
---|
3614 | /* xmlFree(encoding); */ |
---|
3615 | ctxt->wellFormed = 0; |
---|
3616 | ctxt->disableSAX = 1; |
---|
3617 | ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; |
---|
3618 | } |
---|
3619 | } |
---|
3620 | } |
---|
3621 | |
---|
3622 | /** |
---|
3623 | * docbCheckMeta: |
---|
3624 | * @ctxt: an SGML parser context |
---|
3625 | * @atts: the attributes values |
---|
3626 | * |
---|
3627 | * Checks an attributes from a Meta tag |
---|
3628 | */ |
---|
3629 | static void |
---|
3630 | docbCheckMeta(docbParserCtxtPtr ctxt, const xmlChar **atts) { |
---|
3631 | int i; |
---|
3632 | const xmlChar *att, *value; |
---|
3633 | int http = 0; |
---|
3634 | const xmlChar *content = NULL; |
---|
3635 | |
---|
3636 | if ((ctxt == NULL) || (atts == NULL)) |
---|
3637 | return; |
---|
3638 | |
---|
3639 | i = 0; |
---|
3640 | att = atts[i++]; |
---|
3641 | while (att != NULL) { |
---|
3642 | value = atts[i++]; |
---|
3643 | if ((value != NULL) && |
---|
3644 | ((xmlStrEqual(att, BAD_CAST"http-equiv")) || |
---|
3645 | (xmlStrEqual(att, BAD_CAST"Http-Equiv")) || |
---|
3646 | (xmlStrEqual(att, BAD_CAST"HTTP-EQUIV"))) && |
---|
3647 | ((xmlStrEqual(value, BAD_CAST"Content-Type")) || |
---|
3648 | (xmlStrEqual(value, BAD_CAST"content-type")) || |
---|
3649 | (xmlStrEqual(value, BAD_CAST"CONTENT-TYPE")))) |
---|
3650 | http = 1; |
---|
3651 | else if ((value != NULL) && |
---|
3652 | ((xmlStrEqual(att, BAD_CAST"content")) || |
---|
3653 | (xmlStrEqual(att, BAD_CAST"Content")) || |
---|
3654 | (xmlStrEqual(att, BAD_CAST"CONTENT")))) |
---|
3655 | content = value; |
---|
3656 | att = atts[i++]; |
---|
3657 | } |
---|
3658 | if ((http) && (content != NULL)) |
---|
3659 | docbCheckEncoding(ctxt, content); |
---|
3660 | |
---|
3661 | } |
---|
3662 | |
---|
3663 | /** |
---|
3664 | * docbParseStartTag: |
---|
3665 | * @ctxt: an SGML parser context |
---|
3666 | * |
---|
3667 | * parse a start of tag either for rule element or |
---|
3668 | * EmptyElement. In both case we don't parse the tag closing chars. |
---|
3669 | * |
---|
3670 | * [40] STag ::= '<' Name (S Attribute)* S? '>' |
---|
3671 | * |
---|
3672 | * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' |
---|
3673 | * |
---|
3674 | * With namespace: |
---|
3675 | * |
---|
3676 | * [NS 8] STag ::= '<' QName (S Attribute)* S? '>' |
---|
3677 | * |
---|
3678 | * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' |
---|
3679 | * |
---|
3680 | */ |
---|
3681 | |
---|
3682 | static void |
---|
3683 | docbParseStartTag(docbParserCtxtPtr ctxt) { |
---|
3684 | xmlChar *name; |
---|
3685 | xmlChar *attname; |
---|
3686 | xmlChar *attvalue; |
---|
3687 | const xmlChar **atts = NULL; |
---|
3688 | int nbatts = 0; |
---|
3689 | int maxatts = 0; |
---|
3690 | int meta = 0; |
---|
3691 | int i; |
---|
3692 | |
---|
3693 | if (CUR != '<') return; |
---|
3694 | NEXT; |
---|
3695 | |
---|
3696 | GROW; |
---|
3697 | name = docbParseSGMLName(ctxt); |
---|
3698 | if (name == NULL) { |
---|
3699 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
3700 | ctxt->sax->error(ctxt->userData, |
---|
3701 | "docbParseStartTag: invalid element name\n"); |
---|
3702 | ctxt->wellFormed = 0; |
---|
3703 | return; |
---|
3704 | } |
---|
3705 | if (xmlStrEqual(name, BAD_CAST"meta")) |
---|
3706 | meta = 1; |
---|
3707 | |
---|
3708 | /* |
---|
3709 | * Check for auto-closure of SGML elements. |
---|
3710 | */ |
---|
3711 | docbAutoClose(ctxt, name); |
---|
3712 | |
---|
3713 | /* |
---|
3714 | * Now parse the attributes, it ends up with the ending |
---|
3715 | * |
---|
3716 | * (S Attribute)* S? |
---|
3717 | */ |
---|
3718 | SKIP_BLANKS; |
---|
3719 | while ((IS_CHAR(CUR)) && |
---|
3720 | (CUR != '>') && |
---|
3721 | ((CUR != '/') || (NXT(1) != '>'))) { |
---|
3722 | long cons = ctxt->nbChars; |
---|
3723 | |
---|
3724 | GROW; |
---|
3725 | attname = docbParseAttribute(ctxt, &attvalue); |
---|
3726 | if (attname != NULL) { |
---|
3727 | |
---|
3728 | /* |
---|
3729 | * Well formedness requires at most one declaration of an attribute |
---|
3730 | */ |
---|
3731 | for (i = 0; i < nbatts;i += 2) { |
---|
3732 | if (xmlStrEqual(atts[i], attname)) { |
---|
3733 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
3734 | ctxt->sax->error(ctxt->userData, |
---|
3735 | "Attribute %s redefined\n", |
---|
3736 | attname); |
---|
3737 | ctxt->wellFormed = 0; |
---|
3738 | xmlFree(attname); |
---|
3739 | if (attvalue != NULL) |
---|
3740 | xmlFree(attvalue); |
---|
3741 | goto failed; |
---|
3742 | } |
---|
3743 | } |
---|
3744 | |
---|
3745 | /* |
---|
3746 | * Add the pair to atts |
---|
3747 | */ |
---|
3748 | if (atts == NULL) { |
---|
3749 | maxatts = 10; |
---|
3750 | atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *)); |
---|
3751 | if (atts == NULL) { |
---|
3752 | xmlGenericError(xmlGenericErrorContext, |
---|
3753 | "malloc of %ld byte failed\n", |
---|
3754 | maxatts * (long)sizeof(xmlChar *)); |
---|
3755 | if (name != NULL) xmlFree(name); |
---|
3756 | return; |
---|
3757 | } |
---|
3758 | } else if (nbatts + 4 > maxatts) { |
---|
3759 | maxatts *= 2; |
---|
3760 | atts = (const xmlChar **) xmlRealloc((void *)atts, maxatts * sizeof(xmlChar *)); |
---|
3761 | if (atts == NULL) { |
---|
3762 | xmlGenericError(xmlGenericErrorContext, |
---|
3763 | "realloc of %ld byte failed\n", |
---|
3764 | maxatts * (long)sizeof(xmlChar *)); |
---|
3765 | if (name != NULL) xmlFree(name); |
---|
3766 | return; |
---|
3767 | } |
---|
3768 | } |
---|
3769 | atts[nbatts++] = attname; |
---|
3770 | atts[nbatts++] = attvalue; |
---|
3771 | atts[nbatts] = NULL; |
---|
3772 | atts[nbatts + 1] = NULL; |
---|
3773 | } |
---|
3774 | |
---|
3775 | failed: |
---|
3776 | SKIP_BLANKS; |
---|
3777 | if (cons == ctxt->nbChars) { |
---|
3778 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
3779 | ctxt->sax->error(ctxt->userData, |
---|
3780 | "docbParseStartTag: problem parsing attributes\n"); |
---|
3781 | ctxt->wellFormed = 0; |
---|
3782 | break; |
---|
3783 | } |
---|
3784 | } |
---|
3785 | |
---|
3786 | /* |
---|
3787 | * Handle specific association to the META tag |
---|
3788 | */ |
---|
3789 | if (meta) |
---|
3790 | docbCheckMeta(ctxt, atts); |
---|
3791 | |
---|
3792 | /* |
---|
3793 | * SAX: Start of Element ! |
---|
3794 | */ |
---|
3795 | docbnamePush(ctxt, xmlStrdup(name)); |
---|
3796 | #ifdef DEBUG |
---|
3797 | xmlGenericError(xmlGenericErrorContext,"Start of element %s: pushed %s\n", name, ctxt->name); |
---|
3798 | #endif |
---|
3799 | if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) |
---|
3800 | ctxt->sax->startElement(ctxt->userData, name, atts); |
---|
3801 | |
---|
3802 | if (atts != NULL) { |
---|
3803 | for (i = 0;i < nbatts;i++) { |
---|
3804 | if (atts[i] != NULL) |
---|
3805 | xmlFree((xmlChar *) atts[i]); |
---|
3806 | } |
---|
3807 | xmlFree((void *) atts); |
---|
3808 | } |
---|
3809 | if (name != NULL) xmlFree(name); |
---|
3810 | } |
---|
3811 | |
---|
3812 | /** |
---|
3813 | * docbParseEndTag: |
---|
3814 | * @ctxt: an SGML parser context |
---|
3815 | * |
---|
3816 | * parse an end of tag |
---|
3817 | * |
---|
3818 | * [42] ETag ::= '</' Name S? '>' |
---|
3819 | * |
---|
3820 | * With namespace |
---|
3821 | * |
---|
3822 | * [NS 9] ETag ::= '</' QName S? '>' |
---|
3823 | */ |
---|
3824 | |
---|
3825 | static void |
---|
3826 | docbParseEndTag(docbParserCtxtPtr ctxt) { |
---|
3827 | xmlChar *name; |
---|
3828 | xmlChar *oldname; |
---|
3829 | int i; |
---|
3830 | |
---|
3831 | if ((CUR != '<') || (NXT(1) != '/')) { |
---|
3832 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
3833 | ctxt->sax->error(ctxt->userData, "docbParseEndTag: '</' not found\n"); |
---|
3834 | ctxt->wellFormed = 0; |
---|
3835 | return; |
---|
3836 | } |
---|
3837 | SKIP(2); |
---|
3838 | |
---|
3839 | name = docbParseSGMLName(ctxt); |
---|
3840 | if (name == NULL) { |
---|
3841 | if (CUR == '>') { |
---|
3842 | NEXT; |
---|
3843 | oldname = docbnamePop(ctxt); |
---|
3844 | if (oldname != NULL) { |
---|
3845 | if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) |
---|
3846 | ctxt->sax->endElement(ctxt->userData, name); |
---|
3847 | #ifdef DEBUG |
---|
3848 | xmlGenericError(xmlGenericErrorContext,"End of tag </>: popping out %s\n", oldname); |
---|
3849 | #endif |
---|
3850 | xmlFree(oldname); |
---|
3851 | #ifdef DEBUG |
---|
3852 | } else { |
---|
3853 | xmlGenericError(xmlGenericErrorContext,"End of tag </>: stack empty !!!\n"); |
---|
3854 | #endif |
---|
3855 | } |
---|
3856 | return; |
---|
3857 | } else |
---|
3858 | return; |
---|
3859 | } |
---|
3860 | |
---|
3861 | /* |
---|
3862 | * We should definitely be at the ending "S? '>'" part |
---|
3863 | */ |
---|
3864 | SKIP_BLANKS; |
---|
3865 | if ((!IS_CHAR(CUR)) || (CUR != '>')) { |
---|
3866 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
3867 | ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n"); |
---|
3868 | ctxt->wellFormed = 0; |
---|
3869 | } else |
---|
3870 | NEXT; |
---|
3871 | |
---|
3872 | /* |
---|
3873 | * If the name read is not one of the element in the parsing stack |
---|
3874 | * then return, it's just an error. |
---|
3875 | */ |
---|
3876 | for (i = (ctxt->nameNr - 1);i >= 0;i--) { |
---|
3877 | if (xmlStrEqual(name, ctxt->nameTab[i])) break; |
---|
3878 | } |
---|
3879 | if (i < 0) { |
---|
3880 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
3881 | ctxt->sax->error(ctxt->userData, |
---|
3882 | "Unexpected end tag : %s\n", name); |
---|
3883 | xmlFree(name); |
---|
3884 | ctxt->wellFormed = 0; |
---|
3885 | return; |
---|
3886 | } |
---|
3887 | |
---|
3888 | |
---|
3889 | /* |
---|
3890 | * Check for auto-closure of SGML elements. |
---|
3891 | */ |
---|
3892 | |
---|
3893 | docbAutoCloseOnClose(ctxt, name); |
---|
3894 | |
---|
3895 | /* |
---|
3896 | * Well formedness constraints, opening and closing must match. |
---|
3897 | * With the exception that the autoclose may have popped stuff out |
---|
3898 | * of the stack. |
---|
3899 | */ |
---|
3900 | if (((name[0] != '/') || (name[1] != 0)) && |
---|
3901 | (!xmlStrEqual(name, ctxt->name))) { |
---|
3902 | #ifdef DEBUG |
---|
3903 | xmlGenericError(xmlGenericErrorContext,"End of tag %s: expecting %s\n", name, ctxt->name); |
---|
3904 | #endif |
---|
3905 | if ((ctxt->name != NULL) && |
---|
3906 | (!xmlStrEqual(ctxt->name, name))) { |
---|
3907 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
3908 | ctxt->sax->error(ctxt->userData, |
---|
3909 | "Opening and ending tag mismatch: %s and %s\n", |
---|
3910 | name, ctxt->name); |
---|
3911 | ctxt->wellFormed = 0; |
---|
3912 | } |
---|
3913 | } |
---|
3914 | |
---|
3915 | /* |
---|
3916 | * SAX: End of Tag |
---|
3917 | */ |
---|
3918 | oldname = ctxt->name; |
---|
3919 | if (((name[0] == '/') && (name[1] == 0)) || |
---|
3920 | ((oldname != NULL) && (xmlStrEqual(oldname, name)))) { |
---|
3921 | if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) |
---|
3922 | ctxt->sax->endElement(ctxt->userData, name); |
---|
3923 | oldname = docbnamePop(ctxt); |
---|
3924 | if (oldname != NULL) { |
---|
3925 | #ifdef DEBUG |
---|
3926 | xmlGenericError(xmlGenericErrorContext,"End of tag %s: popping out %s\n", name, oldname); |
---|
3927 | #endif |
---|
3928 | xmlFree(oldname); |
---|
3929 | #ifdef DEBUG |
---|
3930 | } else { |
---|
3931 | xmlGenericError(xmlGenericErrorContext,"End of tag %s: stack empty !!!\n", name); |
---|
3932 | #endif |
---|
3933 | } |
---|
3934 | } |
---|
3935 | |
---|
3936 | if (name != NULL) |
---|
3937 | xmlFree(name); |
---|
3938 | |
---|
3939 | return; |
---|
3940 | } |
---|
3941 | |
---|
3942 | |
---|
3943 | /** |
---|
3944 | * docbParseReference: |
---|
3945 | * @ctxt: an SGML parser context |
---|
3946 | * |
---|
3947 | * parse and handle entity references in content, |
---|
3948 | * this will end-up in a call to character() since this is either a |
---|
3949 | * CharRef, or a predefined entity. |
---|
3950 | */ |
---|
3951 | static void |
---|
3952 | docbParseReference(docbParserCtxtPtr ctxt) { |
---|
3953 | docbEntityDescPtr ent; |
---|
3954 | xmlEntityPtr xent; |
---|
3955 | xmlChar out[6]; |
---|
3956 | xmlChar *name; |
---|
3957 | if (CUR != '&') return; |
---|
3958 | |
---|
3959 | if (NXT(1) == '#') { |
---|
3960 | unsigned int c; |
---|
3961 | int bits, i = 0; |
---|
3962 | |
---|
3963 | c = docbParseCharRef(ctxt); |
---|
3964 | if (c < 0x80) { out[i++]= c; bits= -6; } |
---|
3965 | else if (c < 0x800) { out[i++]=((c >> 6) & 0x1F) | 0xC0; bits= 0; } |
---|
3966 | else if (c < 0x10000) { out[i++]=((c >> 12) & 0x0F) | 0xE0; bits= 6; } |
---|
3967 | else { out[i++]=((c >> 18) & 0x07) | 0xF0; bits= 12; } |
---|
3968 | |
---|
3969 | for ( ; bits >= 0; bits-= 6) { |
---|
3970 | out[i++]= ((c >> bits) & 0x3F) | 0x80; |
---|
3971 | } |
---|
3972 | out[i] = 0; |
---|
3973 | |
---|
3974 | if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) |
---|
3975 | ctxt->sax->characters(ctxt->userData, out, i); |
---|
3976 | } else { |
---|
3977 | /* |
---|
3978 | * Lookup the entity in the table. |
---|
3979 | */ |
---|
3980 | xent = docbParseEntityRef(ctxt, &name); |
---|
3981 | if (xent != NULL) { |
---|
3982 | if (((ctxt->replaceEntities) || (ctxt->loadsubset)) && |
---|
3983 | ((xent->children == NULL) && |
---|
3984 | (xent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))) { |
---|
3985 | /* |
---|
3986 | * we really need to fetch and parse the external entity |
---|
3987 | */ |
---|
3988 | int parse; |
---|
3989 | xmlNodePtr children = NULL; |
---|
3990 | |
---|
3991 | parse = docbParseCtxtExternalEntity(ctxt, |
---|
3992 | xent->SystemID, xent->ExternalID, &children); |
---|
3993 | xmlAddChildList((xmlNodePtr) xent, children); |
---|
3994 | } |
---|
3995 | if (ctxt->replaceEntities) { |
---|
3996 | if ((ctxt->node != NULL) && (xent->children != NULL)) { |
---|
3997 | /* |
---|
3998 | * Seems we are generating the DOM content, do |
---|
3999 | * a simple tree copy |
---|
4000 | */ |
---|
4001 | xmlNodePtr new; |
---|
4002 | new = xmlCopyNodeList(xent->children); |
---|
4003 | |
---|
4004 | xmlAddChildList(ctxt->node, new); |
---|
4005 | /* |
---|
4006 | * This is to avoid a nasty side effect, see |
---|
4007 | * characters() in SAX.c |
---|
4008 | */ |
---|
4009 | ctxt->nodemem = 0; |
---|
4010 | ctxt->nodelen = 0; |
---|
4011 | } |
---|
4012 | } else { |
---|
4013 | if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && |
---|
4014 | (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { |
---|
4015 | /* |
---|
4016 | * Create a node. |
---|
4017 | */ |
---|
4018 | ctxt->sax->reference(ctxt->userData, xent->name); |
---|
4019 | } |
---|
4020 | } |
---|
4021 | } else if (name != NULL) { |
---|
4022 | ent = docbEntityLookup(name); |
---|
4023 | if ((ent == NULL) || (ent->value <= 0)) { |
---|
4024 | if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) { |
---|
4025 | ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1); |
---|
4026 | ctxt->sax->characters(ctxt->userData, name, xmlStrlen(name)); |
---|
4027 | /* ctxt->sax->characters(ctxt->userData, BAD_CAST ";", 1); */ |
---|
4028 | } |
---|
4029 | } else { |
---|
4030 | unsigned int c; |
---|
4031 | int bits, i = 0; |
---|
4032 | |
---|
4033 | c = ent->value; |
---|
4034 | if (c < 0x80) |
---|
4035 | { out[i++]= c; bits= -6; } |
---|
4036 | else if (c < 0x800) |
---|
4037 | { out[i++]=((c >> 6) & 0x1F) | 0xC0; bits= 0; } |
---|
4038 | else if (c < 0x10000) |
---|
4039 | { out[i++]=((c >> 12) & 0x0F) | 0xE0; bits= 6; } |
---|
4040 | else |
---|
4041 | { out[i++]=((c >> 18) & 0x07) | 0xF0; bits= 12; } |
---|
4042 | |
---|
4043 | for ( ; bits >= 0; bits-= 6) { |
---|
4044 | out[i++]= ((c >> bits) & 0x3F) | 0x80; |
---|
4045 | } |
---|
4046 | out[i] = 0; |
---|
4047 | |
---|
4048 | if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) |
---|
4049 | ctxt->sax->characters(ctxt->userData, out, i); |
---|
4050 | } |
---|
4051 | } else { |
---|
4052 | if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) |
---|
4053 | ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1); |
---|
4054 | return; |
---|
4055 | } |
---|
4056 | if (name != NULL) |
---|
4057 | xmlFree(name); |
---|
4058 | } |
---|
4059 | } |
---|
4060 | |
---|
4061 | /** |
---|
4062 | * docbParseContent: |
---|
4063 | * @ctxt: an SGML parser context |
---|
4064 | * @name: the node name |
---|
4065 | * |
---|
4066 | * Parse a content: comment, sub-element, reference or text. |
---|
4067 | * |
---|
4068 | */ |
---|
4069 | static void |
---|
4070 | docbParseContent(docbParserCtxtPtr ctxt) |
---|
4071 | { |
---|
4072 | xmlChar *currentNode; |
---|
4073 | int depth; |
---|
4074 | |
---|
4075 | currentNode = xmlStrdup(ctxt->name); |
---|
4076 | depth = ctxt->nameNr; |
---|
4077 | while (1) { |
---|
4078 | long cons = ctxt->nbChars; |
---|
4079 | |
---|
4080 | GROW; |
---|
4081 | /* |
---|
4082 | * Our tag or one of it's parent or children is ending. |
---|
4083 | */ |
---|
4084 | if ((CUR == '<') && (NXT(1) == '/')) { |
---|
4085 | docbParseEndTag(ctxt); |
---|
4086 | if (currentNode != NULL) |
---|
4087 | xmlFree(currentNode); |
---|
4088 | return; |
---|
4089 | } |
---|
4090 | |
---|
4091 | /* |
---|
4092 | * Has this node been popped out during parsing of |
---|
4093 | * the next element |
---|
4094 | */ |
---|
4095 | if ((!xmlStrEqual(currentNode, ctxt->name)) && |
---|
4096 | (depth >= ctxt->nameNr)) { |
---|
4097 | if (currentNode != NULL) |
---|
4098 | xmlFree(currentNode); |
---|
4099 | return; |
---|
4100 | } |
---|
4101 | |
---|
4102 | /* |
---|
4103 | * Sometimes DOCTYPE arrives in the middle of the document |
---|
4104 | */ |
---|
4105 | if ((CUR == '<') && (NXT(1) == '!') && |
---|
4106 | (UPP(2) == 'D') && (UPP(3) == 'O') && |
---|
4107 | (UPP(4) == 'C') && (UPP(5) == 'T') && |
---|
4108 | (UPP(6) == 'Y') && (UPP(7) == 'P') && (UPP(8) == 'E')) { |
---|
4109 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
4110 | ctxt->sax->error(ctxt->userData, |
---|
4111 | "Misplaced DOCTYPE declaration\n"); |
---|
4112 | ctxt->wellFormed = 0; |
---|
4113 | docbParseDocTypeDecl(ctxt); |
---|
4114 | } |
---|
4115 | |
---|
4116 | /* |
---|
4117 | * First case : a comment |
---|
4118 | */ |
---|
4119 | if ((CUR == '<') && (NXT(1) == '!') && |
---|
4120 | (NXT(2) == '-') && (NXT(3) == '-')) { |
---|
4121 | docbParseComment(ctxt); |
---|
4122 | } |
---|
4123 | |
---|
4124 | /* |
---|
4125 | * Second case : a PI |
---|
4126 | */ |
---|
4127 | else if ((RAW == '<') && (NXT(1) == '?')) { |
---|
4128 | docbParsePI(ctxt); |
---|
4129 | } |
---|
4130 | |
---|
4131 | /* |
---|
4132 | * Third case : a sub-element. |
---|
4133 | */ |
---|
4134 | else if (CUR == '<') { |
---|
4135 | docbParseElement(ctxt); |
---|
4136 | } |
---|
4137 | |
---|
4138 | /* |
---|
4139 | * Fourth case : a reference. If if has not been resolved, |
---|
4140 | * parsing returns it's Name, create the node |
---|
4141 | */ |
---|
4142 | else if (CUR == '&') { |
---|
4143 | docbParseReference(ctxt); |
---|
4144 | } |
---|
4145 | |
---|
4146 | /* |
---|
4147 | * Fifth : end of the resource |
---|
4148 | */ |
---|
4149 | else if (CUR == 0) { |
---|
4150 | docbAutoClose(ctxt, NULL); |
---|
4151 | if (ctxt->nameNr == 0) |
---|
4152 | break; |
---|
4153 | } |
---|
4154 | |
---|
4155 | /* |
---|
4156 | * Last case, text. Note that References are handled directly. |
---|
4157 | */ |
---|
4158 | else { |
---|
4159 | docbParseCharData(ctxt); |
---|
4160 | } |
---|
4161 | |
---|
4162 | if (cons == ctxt->nbChars) { |
---|
4163 | if (ctxt->node != NULL) { |
---|
4164 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
4165 | ctxt->sax->error(ctxt->userData, |
---|
4166 | "detected an error in element content\n"); |
---|
4167 | ctxt->wellFormed = 0; |
---|
4168 | } |
---|
4169 | break; |
---|
4170 | } |
---|
4171 | |
---|
4172 | GROW; |
---|
4173 | } |
---|
4174 | if (currentNode != NULL) |
---|
4175 | xmlFree(currentNode); |
---|
4176 | } |
---|
4177 | |
---|
4178 | /** |
---|
4179 | * docbParseElement: |
---|
4180 | * @ctxt: an SGML parser context |
---|
4181 | * |
---|
4182 | * parse an SGML element, this is highly recursive |
---|
4183 | * |
---|
4184 | * [39] element ::= EmptyElemTag | STag content ETag |
---|
4185 | * |
---|
4186 | * [41] Attribute ::= Name Eq AttValue |
---|
4187 | */ |
---|
4188 | |
---|
4189 | static void |
---|
4190 | docbParseElement(docbParserCtxtPtr ctxt) { |
---|
4191 | xmlChar *name; |
---|
4192 | xmlChar *currentNode = NULL; |
---|
4193 | docbElemDescPtr info; |
---|
4194 | docbParserNodeInfo node_info; |
---|
4195 | xmlChar *oldname; |
---|
4196 | int depth = ctxt->nameNr; |
---|
4197 | |
---|
4198 | /* Capture start position */ |
---|
4199 | if (ctxt->record_info) { |
---|
4200 | node_info.begin_pos = ctxt->input->consumed + |
---|
4201 | (CUR_PTR - ctxt->input->base); |
---|
4202 | node_info.begin_line = ctxt->input->line; |
---|
4203 | } |
---|
4204 | |
---|
4205 | oldname = xmlStrdup(ctxt->name); |
---|
4206 | docbParseStartTag(ctxt); |
---|
4207 | name = ctxt->name; |
---|
4208 | #ifdef DEBUG |
---|
4209 | if (oldname == NULL) |
---|
4210 | xmlGenericError(xmlGenericErrorContext, |
---|
4211 | "Start of element %s\n", name); |
---|
4212 | else if (name == NULL) |
---|
4213 | xmlGenericError(xmlGenericErrorContext, |
---|
4214 | "Start of element failed, was %s\n", oldname); |
---|
4215 | else |
---|
4216 | xmlGenericError(xmlGenericErrorContext, |
---|
4217 | "Start of element %s, was %s\n", name, oldname); |
---|
4218 | #endif |
---|
4219 | if (((depth == ctxt->nameNr) && (xmlStrEqual(oldname, ctxt->name))) || |
---|
4220 | (name == NULL)) { |
---|
4221 | if (CUR == '>') |
---|
4222 | NEXT; |
---|
4223 | if (oldname != NULL) |
---|
4224 | xmlFree(oldname); |
---|
4225 | return; |
---|
4226 | } |
---|
4227 | if (oldname != NULL) |
---|
4228 | xmlFree(oldname); |
---|
4229 | |
---|
4230 | /* |
---|
4231 | * Lookup the info for that element. |
---|
4232 | */ |
---|
4233 | info = docbTagLookup(name); |
---|
4234 | if (info == NULL) { |
---|
4235 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
4236 | ctxt->sax->error(ctxt->userData, "Tag %s unknown\n", |
---|
4237 | name); |
---|
4238 | ctxt->wellFormed = 0; |
---|
4239 | } else if (info->depr) { |
---|
4240 | /*************************** |
---|
4241 | if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) |
---|
4242 | ctxt->sax->warning(ctxt->userData, "Tag %s is deprecated\n", |
---|
4243 | name); |
---|
4244 | ***************************/ |
---|
4245 | } |
---|
4246 | |
---|
4247 | /* |
---|
4248 | * Check for an Empty Element labeled the XML/SGML way |
---|
4249 | */ |
---|
4250 | if ((CUR == '/') && (NXT(1) == '>')) { |
---|
4251 | SKIP(2); |
---|
4252 | if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) |
---|
4253 | ctxt->sax->endElement(ctxt->userData, name); |
---|
4254 | oldname = docbnamePop(ctxt); |
---|
4255 | #ifdef DEBUG |
---|
4256 | xmlGenericError(xmlGenericErrorContext,"End of tag the XML way: popping out %s\n", oldname); |
---|
4257 | #endif |
---|
4258 | if (oldname != NULL) |
---|
4259 | xmlFree(oldname); |
---|
4260 | return; |
---|
4261 | } |
---|
4262 | |
---|
4263 | if (CUR == '>') { |
---|
4264 | NEXT; |
---|
4265 | } else { |
---|
4266 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
4267 | ctxt->sax->error(ctxt->userData, |
---|
4268 | "Couldn't find end of Start Tag %s\n", |
---|
4269 | name); |
---|
4270 | ctxt->wellFormed = 0; |
---|
4271 | |
---|
4272 | /* |
---|
4273 | * end of parsing of this node. |
---|
4274 | */ |
---|
4275 | if (xmlStrEqual(name, ctxt->name)) { |
---|
4276 | nodePop(ctxt); |
---|
4277 | oldname = docbnamePop(ctxt); |
---|
4278 | #ifdef DEBUG |
---|
4279 | xmlGenericError(xmlGenericErrorContext,"End of start tag problem: popping out %s\n", oldname); |
---|
4280 | #endif |
---|
4281 | if (oldname != NULL) |
---|
4282 | xmlFree(oldname); |
---|
4283 | } |
---|
4284 | |
---|
4285 | /* |
---|
4286 | * Capture end position and add node |
---|
4287 | */ |
---|
4288 | if ( currentNode != NULL && ctxt->record_info ) { |
---|
4289 | node_info.end_pos = ctxt->input->consumed + |
---|
4290 | (CUR_PTR - ctxt->input->base); |
---|
4291 | node_info.end_line = ctxt->input->line; |
---|
4292 | node_info.node = ctxt->node; |
---|
4293 | xmlParserAddNodeInfo(ctxt, &node_info); |
---|
4294 | } |
---|
4295 | return; |
---|
4296 | } |
---|
4297 | |
---|
4298 | /* |
---|
4299 | * Check for an Empty Element from DTD definition |
---|
4300 | */ |
---|
4301 | if ((info != NULL) && (info->empty)) { |
---|
4302 | if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) |
---|
4303 | ctxt->sax->endElement(ctxt->userData, name); |
---|
4304 | oldname = docbnamePop(ctxt); |
---|
4305 | #ifdef DEBUG |
---|
4306 | xmlGenericError(xmlGenericErrorContext,"End of empty tag %s : popping out %s\n", name, oldname); |
---|
4307 | #endif |
---|
4308 | if (oldname != NULL) |
---|
4309 | xmlFree(oldname); |
---|
4310 | return; |
---|
4311 | } |
---|
4312 | |
---|
4313 | /* |
---|
4314 | * Parse the content of the element: |
---|
4315 | */ |
---|
4316 | currentNode = xmlStrdup(ctxt->name); |
---|
4317 | depth = ctxt->nameNr; |
---|
4318 | while (IS_CHAR(CUR)) { |
---|
4319 | docbParseContent(ctxt); |
---|
4320 | if (ctxt->nameNr < depth) break; |
---|
4321 | } |
---|
4322 | |
---|
4323 | if (!IS_CHAR(CUR)) { |
---|
4324 | /************ |
---|
4325 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
4326 | ctxt->sax->error(ctxt->userData, |
---|
4327 | "Premature end of data in tag %s\n", currentNode); |
---|
4328 | ctxt->wellFormed = 0; |
---|
4329 | *************/ |
---|
4330 | |
---|
4331 | /* |
---|
4332 | * end of parsing of this node. |
---|
4333 | */ |
---|
4334 | nodePop(ctxt); |
---|
4335 | oldname = docbnamePop(ctxt); |
---|
4336 | #ifdef DEBUG |
---|
4337 | xmlGenericError(xmlGenericErrorContext,"Premature end of tag %s : popping out %s\n", name, oldname); |
---|
4338 | #endif |
---|
4339 | if (oldname != NULL) |
---|
4340 | xmlFree(oldname); |
---|
4341 | if (currentNode != NULL) |
---|
4342 | xmlFree(currentNode); |
---|
4343 | return; |
---|
4344 | } |
---|
4345 | |
---|
4346 | /* |
---|
4347 | * Capture end position and add node |
---|
4348 | */ |
---|
4349 | if ( currentNode != NULL && ctxt->record_info ) { |
---|
4350 | node_info.end_pos = ctxt->input->consumed + |
---|
4351 | (CUR_PTR - ctxt->input->base); |
---|
4352 | node_info.end_line = ctxt->input->line; |
---|
4353 | node_info.node = ctxt->node; |
---|
4354 | xmlParserAddNodeInfo(ctxt, &node_info); |
---|
4355 | } |
---|
4356 | if (currentNode != NULL) |
---|
4357 | xmlFree(currentNode); |
---|
4358 | } |
---|
4359 | |
---|
4360 | /** |
---|
4361 | * docbParseEntityDecl: |
---|
4362 | * @ctxt: an SGML parser context |
---|
4363 | * |
---|
4364 | * parse <!ENTITY declarations |
---|
4365 | * |
---|
4366 | */ |
---|
4367 | |
---|
4368 | static void |
---|
4369 | docbParseEntityDecl(xmlParserCtxtPtr ctxt) { |
---|
4370 | xmlChar *name = NULL; |
---|
4371 | xmlChar *value = NULL; |
---|
4372 | xmlChar *URI = NULL, *literal = NULL; |
---|
4373 | xmlChar *ndata = NULL; |
---|
4374 | int isParameter = 0; |
---|
4375 | xmlChar *orig = NULL; |
---|
4376 | |
---|
4377 | GROW; |
---|
4378 | if ((RAW == '<') && (NXT(1) == '!') && |
---|
4379 | (UPP(2) == 'E') && (UPP(3) == 'N') && |
---|
4380 | (UPP(4) == 'T') && (UPP(5) == 'I') && |
---|
4381 | (UPP(6) == 'T') && (UPP(7) == 'Y')) { |
---|
4382 | xmlParserInputPtr input = ctxt->input; |
---|
4383 | ctxt->instate = XML_PARSER_ENTITY_DECL; |
---|
4384 | SHRINK; |
---|
4385 | SKIP(8); |
---|
4386 | if (!IS_BLANK(CUR)) { |
---|
4387 | ctxt->errNo = XML_ERR_SPACE_REQUIRED; |
---|
4388 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
4389 | ctxt->sax->error(ctxt->userData, |
---|
4390 | "Space required after '<!ENTITY'\n"); |
---|
4391 | ctxt->wellFormed = 0; |
---|
4392 | ctxt->disableSAX = 1; |
---|
4393 | } |
---|
4394 | SKIP_BLANKS; |
---|
4395 | |
---|
4396 | if (RAW == '%') { |
---|
4397 | NEXT; |
---|
4398 | if (!IS_BLANK(CUR)) { |
---|
4399 | ctxt->errNo = XML_ERR_SPACE_REQUIRED; |
---|
4400 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
4401 | ctxt->sax->error(ctxt->userData, |
---|
4402 | "Space required after '%'\n"); |
---|
4403 | ctxt->wellFormed = 0; |
---|
4404 | ctxt->disableSAX = 1; |
---|
4405 | } |
---|
4406 | SKIP_BLANKS; |
---|
4407 | isParameter = 1; |
---|
4408 | } |
---|
4409 | |
---|
4410 | name = xmlParseName(ctxt); |
---|
4411 | if (name == NULL) { |
---|
4412 | ctxt->errNo = XML_ERR_NAME_REQUIRED; |
---|
4413 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
4414 | ctxt->sax->error(ctxt->userData, "sgmlarseEntityDecl: no name\n"); |
---|
4415 | ctxt->wellFormed = 0; |
---|
4416 | ctxt->disableSAX = 1; |
---|
4417 | return; |
---|
4418 | } |
---|
4419 | if (!IS_BLANK(CUR)) { |
---|
4420 | ctxt->errNo = XML_ERR_SPACE_REQUIRED; |
---|
4421 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
4422 | ctxt->sax->error(ctxt->userData, |
---|
4423 | "Space required after the entity name\n"); |
---|
4424 | ctxt->wellFormed = 0; |
---|
4425 | ctxt->disableSAX = 1; |
---|
4426 | } |
---|
4427 | SKIP_BLANKS; |
---|
4428 | |
---|
4429 | /* |
---|
4430 | * handle the various case of definitions... |
---|
4431 | */ |
---|
4432 | if (isParameter) { |
---|
4433 | if ((RAW == '"') || (RAW == '\'')) { |
---|
4434 | value = xmlParseEntityValue(ctxt, &orig); |
---|
4435 | if (value) { |
---|
4436 | if ((ctxt->sax != NULL) && |
---|
4437 | (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) |
---|
4438 | ctxt->sax->entityDecl(ctxt->userData, name, |
---|
4439 | XML_INTERNAL_PARAMETER_ENTITY, |
---|
4440 | NULL, NULL, value); |
---|
4441 | } |
---|
4442 | } else { |
---|
4443 | URI = xmlParseExternalID(ctxt, &literal, 1); |
---|
4444 | if ((URI == NULL) && (literal == NULL)) { |
---|
4445 | ctxt->errNo = XML_ERR_VALUE_REQUIRED; |
---|
4446 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
4447 | ctxt->sax->error(ctxt->userData, |
---|
4448 | "Entity value required\n"); |
---|
4449 | ctxt->wellFormed = 0; |
---|
4450 | ctxt->disableSAX = 1; |
---|
4451 | } |
---|
4452 | if (URI) { |
---|
4453 | xmlURIPtr uri; |
---|
4454 | |
---|
4455 | uri = xmlParseURI((const char *) URI); |
---|
4456 | if (uri == NULL) { |
---|
4457 | ctxt->errNo = XML_ERR_INVALID_URI; |
---|
4458 | if ((ctxt->sax != NULL) && |
---|
4459 | (!ctxt->disableSAX) && |
---|
4460 | (ctxt->sax->error != NULL)) |
---|
4461 | ctxt->sax->error(ctxt->userData, |
---|
4462 | "Invalid URI: %s\n", URI); |
---|
4463 | ctxt->wellFormed = 0; |
---|
4464 | } else { |
---|
4465 | if (uri->fragment != NULL) { |
---|
4466 | ctxt->errNo = XML_ERR_URI_FRAGMENT; |
---|
4467 | if ((ctxt->sax != NULL) && |
---|
4468 | (!ctxt->disableSAX) && |
---|
4469 | (ctxt->sax->error != NULL)) |
---|
4470 | ctxt->sax->error(ctxt->userData, |
---|
4471 | "Fragment not allowed: %s\n", URI); |
---|
4472 | ctxt->wellFormed = 0; |
---|
4473 | } else { |
---|
4474 | if ((ctxt->sax != NULL) && |
---|
4475 | (!ctxt->disableSAX) && |
---|
4476 | (ctxt->sax->entityDecl != NULL)) |
---|
4477 | ctxt->sax->entityDecl(ctxt->userData, name, |
---|
4478 | XML_EXTERNAL_PARAMETER_ENTITY, |
---|
4479 | literal, URI, NULL); |
---|
4480 | } |
---|
4481 | xmlFreeURI(uri); |
---|
4482 | } |
---|
4483 | } |
---|
4484 | } |
---|
4485 | } else { |
---|
4486 | if ((RAW == '"') || (RAW == '\'')) { |
---|
4487 | value = xmlParseEntityValue(ctxt, &orig); |
---|
4488 | if ((ctxt->sax != NULL) && |
---|
4489 | (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) |
---|
4490 | ctxt->sax->entityDecl(ctxt->userData, name, |
---|
4491 | XML_INTERNAL_GENERAL_ENTITY, |
---|
4492 | NULL, NULL, value); |
---|
4493 | } else { |
---|
4494 | URI = xmlParseExternalID(ctxt, &literal, 1); |
---|
4495 | if ((URI == NULL) && (literal == NULL)) { |
---|
4496 | ctxt->errNo = XML_ERR_VALUE_REQUIRED; |
---|
4497 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
4498 | ctxt->sax->error(ctxt->userData, |
---|
4499 | "Entity value required\n"); |
---|
4500 | ctxt->wellFormed = 0; |
---|
4501 | ctxt->disableSAX = 1; |
---|
4502 | } |
---|
4503 | if (URI) { |
---|
4504 | xmlURIPtr uri; |
---|
4505 | |
---|
4506 | uri = xmlParseURI((const char *)URI); |
---|
4507 | if (uri == NULL) { |
---|
4508 | ctxt->errNo = XML_ERR_INVALID_URI; |
---|
4509 | if ((ctxt->sax != NULL) && |
---|
4510 | (!ctxt->disableSAX) && |
---|
4511 | (ctxt->sax->error != NULL)) |
---|
4512 | ctxt->sax->error(ctxt->userData, |
---|
4513 | "Invalid URI: %s\n", URI); |
---|
4514 | ctxt->wellFormed = 0; |
---|
4515 | } else { |
---|
4516 | if (uri->fragment != NULL) { |
---|
4517 | ctxt->errNo = XML_ERR_URI_FRAGMENT; |
---|
4518 | if ((ctxt->sax != NULL) && |
---|
4519 | (!ctxt->disableSAX) && |
---|
4520 | (ctxt->sax->error != NULL)) |
---|
4521 | ctxt->sax->error(ctxt->userData, |
---|
4522 | "Fragment not allowed: %s\n", URI); |
---|
4523 | ctxt->wellFormed = 0; |
---|
4524 | } |
---|
4525 | xmlFreeURI(uri); |
---|
4526 | } |
---|
4527 | } |
---|
4528 | if ((RAW != '>') && (!IS_BLANK(CUR))) { |
---|
4529 | ctxt->errNo = XML_ERR_SPACE_REQUIRED; |
---|
4530 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
4531 | ctxt->sax->error(ctxt->userData, |
---|
4532 | "Space required before content model\n"); |
---|
4533 | ctxt->wellFormed = 0; |
---|
4534 | ctxt->disableSAX = 1; |
---|
4535 | } |
---|
4536 | SKIP_BLANKS; |
---|
4537 | |
---|
4538 | /* |
---|
4539 | * SGML specific: here we can get the content model |
---|
4540 | */ |
---|
4541 | if (RAW != '>') { |
---|
4542 | xmlChar *contmod; |
---|
4543 | |
---|
4544 | contmod = xmlParseName(ctxt); |
---|
4545 | |
---|
4546 | if (contmod == NULL) { |
---|
4547 | ctxt->errNo = XML_ERR_SPACE_REQUIRED; |
---|
4548 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
4549 | ctxt->sax->error(ctxt->userData, |
---|
4550 | "Could not parse entity content model\n"); |
---|
4551 | ctxt->wellFormed = 0; |
---|
4552 | ctxt->disableSAX = 1; |
---|
4553 | } else { |
---|
4554 | if (xmlStrEqual(contmod, BAD_CAST"NDATA")) { |
---|
4555 | if (!IS_BLANK(CUR)) { |
---|
4556 | ctxt->errNo = XML_ERR_SPACE_REQUIRED; |
---|
4557 | if ((ctxt->sax != NULL) && |
---|
4558 | (ctxt->sax->error != NULL)) |
---|
4559 | ctxt->sax->error(ctxt->userData, |
---|
4560 | "Space required after 'NDATA'\n"); |
---|
4561 | ctxt->wellFormed = 0; |
---|
4562 | ctxt->disableSAX = 1; |
---|
4563 | } |
---|
4564 | SKIP_BLANKS; |
---|
4565 | ndata = xmlParseName(ctxt); |
---|
4566 | if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && |
---|
4567 | (ctxt->sax->unparsedEntityDecl != NULL)) { |
---|
4568 | ctxt->sax->unparsedEntityDecl(ctxt->userData, |
---|
4569 | name, literal, URI, ndata); |
---|
4570 | } |
---|
4571 | } else if (xmlStrEqual(contmod, BAD_CAST"SUBDOC")) { |
---|
4572 | if ((ctxt->sax != NULL) && |
---|
4573 | (ctxt->sax->warning != NULL)) |
---|
4574 | ctxt->sax->warning(ctxt->userData, |
---|
4575 | "SUBDOC entities are not supported\n"); |
---|
4576 | SKIP_BLANKS; |
---|
4577 | ndata = xmlParseName(ctxt); |
---|
4578 | if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && |
---|
4579 | (ctxt->sax->unparsedEntityDecl != NULL)) { |
---|
4580 | ctxt->sax->unparsedEntityDecl(ctxt->userData, |
---|
4581 | name, literal, URI, ndata); |
---|
4582 | } |
---|
4583 | } else if (xmlStrEqual(contmod, BAD_CAST"CDATA")) { |
---|
4584 | if ((ctxt->sax != NULL) && |
---|
4585 | (ctxt->sax->warning != NULL)) |
---|
4586 | ctxt->sax->warning(ctxt->userData, |
---|
4587 | "CDATA entities are not supported\n"); |
---|
4588 | SKIP_BLANKS; |
---|
4589 | ndata = xmlParseName(ctxt); |
---|
4590 | if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && |
---|
4591 | (ctxt->sax->unparsedEntityDecl != NULL)) { |
---|
4592 | ctxt->sax->unparsedEntityDecl(ctxt->userData, |
---|
4593 | name, literal, URI, ndata); |
---|
4594 | } |
---|
4595 | } |
---|
4596 | xmlFree(contmod); |
---|
4597 | } |
---|
4598 | } else { |
---|
4599 | if ((ctxt->sax != NULL) && |
---|
4600 | (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL)) |
---|
4601 | ctxt->sax->entityDecl(ctxt->userData, name, |
---|
4602 | XML_EXTERNAL_GENERAL_PARSED_ENTITY, |
---|
4603 | literal, URI, NULL); |
---|
4604 | } |
---|
4605 | } |
---|
4606 | } |
---|
4607 | SKIP_BLANKS; |
---|
4608 | if (RAW != '>') { |
---|
4609 | ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED; |
---|
4610 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
4611 | ctxt->sax->error(ctxt->userData, |
---|
4612 | "docbParseEntityDecl: entity %s not terminated\n", name); |
---|
4613 | ctxt->wellFormed = 0; |
---|
4614 | ctxt->disableSAX = 1; |
---|
4615 | } else { |
---|
4616 | if (input != ctxt->input) { |
---|
4617 | ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; |
---|
4618 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
4619 | ctxt->sax->error(ctxt->userData, |
---|
4620 | "Entity declaration doesn't start and stop in the same entity\n"); |
---|
4621 | ctxt->wellFormed = 0; |
---|
4622 | ctxt->disableSAX = 1; |
---|
4623 | } |
---|
4624 | NEXT; |
---|
4625 | } |
---|
4626 | if (orig != NULL) { |
---|
4627 | /* |
---|
4628 | * Ugly mechanism to save the raw entity value. |
---|
4629 | */ |
---|
4630 | xmlEntityPtr cur = NULL; |
---|
4631 | |
---|
4632 | if (isParameter) { |
---|
4633 | if ((ctxt->sax != NULL) && |
---|
4634 | (ctxt->sax->getParameterEntity != NULL)) |
---|
4635 | cur = ctxt->sax->getParameterEntity(ctxt->userData, name); |
---|
4636 | } else { |
---|
4637 | if ((ctxt->sax != NULL) && |
---|
4638 | (ctxt->sax->getEntity != NULL)) |
---|
4639 | cur = ctxt->sax->getEntity(ctxt->userData, name); |
---|
4640 | } |
---|
4641 | if (cur != NULL) { |
---|
4642 | if (cur->orig != NULL) |
---|
4643 | xmlFree(orig); |
---|
4644 | else |
---|
4645 | cur->orig = orig; |
---|
4646 | } else |
---|
4647 | xmlFree(orig); |
---|
4648 | } |
---|
4649 | if (name != NULL) xmlFree(name); |
---|
4650 | if (value != NULL) xmlFree(value); |
---|
4651 | if (URI != NULL) xmlFree(URI); |
---|
4652 | if (literal != NULL) xmlFree(literal); |
---|
4653 | if (ndata != NULL) xmlFree(ndata); |
---|
4654 | } |
---|
4655 | } |
---|
4656 | |
---|
4657 | /** |
---|
4658 | * docbParseMarkupDecl: |
---|
4659 | * @ctxt: an SGML parser context |
---|
4660 | * |
---|
4661 | * parse Markup declarations |
---|
4662 | * |
---|
4663 | * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | |
---|
4664 | * NotationDecl | PI | Comment |
---|
4665 | */ |
---|
4666 | static void |
---|
4667 | docbParseMarkupDecl(xmlParserCtxtPtr ctxt) { |
---|
4668 | GROW; |
---|
4669 | xmlParseElementDecl(ctxt); |
---|
4670 | xmlParseAttributeListDecl(ctxt); |
---|
4671 | docbParseEntityDecl(ctxt); |
---|
4672 | xmlParseNotationDecl(ctxt); |
---|
4673 | docbParsePI(ctxt); |
---|
4674 | xmlParseComment(ctxt); |
---|
4675 | /* |
---|
4676 | * This is only for internal subset. On external entities, |
---|
4677 | * the replacement is done before parsing stage |
---|
4678 | */ |
---|
4679 | if ((ctxt->external == 0) && (ctxt->inputNr == 1)) |
---|
4680 | xmlParsePEReference(ctxt); |
---|
4681 | ctxt->instate = XML_PARSER_DTD; |
---|
4682 | } |
---|
4683 | |
---|
4684 | /** |
---|
4685 | * docbParseInternalSubset: |
---|
4686 | * @ctxt: an SGML parser context |
---|
4687 | * |
---|
4688 | * parse the internal subset declaration |
---|
4689 | * |
---|
4690 | * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' |
---|
4691 | */ |
---|
4692 | |
---|
4693 | static void |
---|
4694 | docbParseInternalSubset(xmlParserCtxtPtr ctxt) { |
---|
4695 | /* |
---|
4696 | * Is there any DTD definition ? |
---|
4697 | */ |
---|
4698 | if (RAW == '[') { |
---|
4699 | ctxt->instate = XML_PARSER_DTD; |
---|
4700 | NEXT; |
---|
4701 | /* |
---|
4702 | * Parse the succession of Markup declarations and |
---|
4703 | * PEReferences. |
---|
4704 | * Subsequence (markupdecl | PEReference | S)* |
---|
4705 | */ |
---|
4706 | while (RAW != ']') { |
---|
4707 | const xmlChar *check = CUR_PTR; |
---|
4708 | int cons = ctxt->input->consumed; |
---|
4709 | |
---|
4710 | SKIP_BLANKS; |
---|
4711 | docbParseMarkupDecl(ctxt); |
---|
4712 | xmlParsePEReference(ctxt); |
---|
4713 | |
---|
4714 | /* |
---|
4715 | * Pop-up of finished entities. |
---|
4716 | */ |
---|
4717 | while ((RAW == 0) && (ctxt->inputNr > 1)) |
---|
4718 | xmlPopInput(ctxt); |
---|
4719 | |
---|
4720 | if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { |
---|
4721 | ctxt->errNo = XML_ERR_INTERNAL_ERROR; |
---|
4722 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
4723 | ctxt->sax->error(ctxt->userData, |
---|
4724 | "docbParseInternalSubset: error detected in Markup declaration\n"); |
---|
4725 | ctxt->wellFormed = 0; |
---|
4726 | ctxt->disableSAX = 1; |
---|
4727 | break; |
---|
4728 | } |
---|
4729 | } |
---|
4730 | if (RAW == ']') { |
---|
4731 | NEXT; |
---|
4732 | SKIP_BLANKS; |
---|
4733 | } |
---|
4734 | } |
---|
4735 | |
---|
4736 | /* |
---|
4737 | * We should be at the end of the DOCTYPE declaration. |
---|
4738 | */ |
---|
4739 | if (RAW != '>') { |
---|
4740 | ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED; |
---|
4741 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
4742 | ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n"); |
---|
4743 | ctxt->wellFormed = 0; |
---|
4744 | ctxt->disableSAX = 1; |
---|
4745 | } |
---|
4746 | NEXT; |
---|
4747 | } |
---|
4748 | |
---|
4749 | /** |
---|
4750 | * docbParseMisc: |
---|
4751 | * @ctxt: an XML parser context |
---|
4752 | * |
---|
4753 | * parse an XML Misc* optional field. |
---|
4754 | * |
---|
4755 | * [27] Misc ::= Comment | PI | S |
---|
4756 | */ |
---|
4757 | |
---|
4758 | static void |
---|
4759 | docbParseMisc(xmlParserCtxtPtr ctxt) { |
---|
4760 | while (((RAW == '<') && (NXT(1) == '?')) || |
---|
4761 | ((RAW == '<') && (NXT(1) == '!') && |
---|
4762 | (NXT(2) == '-') && (NXT(3) == '-')) || |
---|
4763 | IS_BLANK(CUR)) { |
---|
4764 | if ((RAW == '<') && (NXT(1) == '?')) { |
---|
4765 | docbParsePI(ctxt); |
---|
4766 | } else if (IS_BLANK(CUR)) { |
---|
4767 | NEXT; |
---|
4768 | } else |
---|
4769 | xmlParseComment(ctxt); |
---|
4770 | } |
---|
4771 | } |
---|
4772 | |
---|
4773 | /** |
---|
4774 | * docbParseDocument : |
---|
4775 | * @ctxt: an SGML parser context |
---|
4776 | * |
---|
4777 | * parse an SGML document (and build a tree if using the standard SAX |
---|
4778 | * interface). |
---|
4779 | * |
---|
4780 | * Returns 0, -1 in case of error. the parser context is augmented |
---|
4781 | * as a result of the parsing. |
---|
4782 | */ |
---|
4783 | |
---|
4784 | int |
---|
4785 | docbParseDocument(docbParserCtxtPtr ctxt) { |
---|
4786 | xmlChar start[4]; |
---|
4787 | xmlCharEncoding enc; |
---|
4788 | xmlDtdPtr dtd; |
---|
4789 | |
---|
4790 | docbDefaultSAXHandlerInit(); |
---|
4791 | ctxt->html = 2; |
---|
4792 | |
---|
4793 | GROW; |
---|
4794 | /* |
---|
4795 | * SAX: beginning of the document processing. |
---|
4796 | */ |
---|
4797 | if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) |
---|
4798 | ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); |
---|
4799 | |
---|
4800 | /* |
---|
4801 | * Get the 4 first bytes and decode the charset |
---|
4802 | * if enc != XML_CHAR_ENCODING_NONE |
---|
4803 | * plug some encoding conversion routines. |
---|
4804 | */ |
---|
4805 | start[0] = RAW; |
---|
4806 | start[1] = NXT(1); |
---|
4807 | start[2] = NXT(2); |
---|
4808 | start[3] = NXT(3); |
---|
4809 | enc = xmlDetectCharEncoding(start, 4); |
---|
4810 | if (enc != XML_CHAR_ENCODING_NONE) { |
---|
4811 | xmlSwitchEncoding(ctxt, enc); |
---|
4812 | } |
---|
4813 | |
---|
4814 | /* |
---|
4815 | * Wipe out everything which is before the first '<' |
---|
4816 | */ |
---|
4817 | SKIP_BLANKS; |
---|
4818 | if (CUR == 0) { |
---|
4819 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
4820 | ctxt->sax->error(ctxt->userData, "Document is empty\n"); |
---|
4821 | ctxt->wellFormed = 0; |
---|
4822 | } |
---|
4823 | |
---|
4824 | if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX)) |
---|
4825 | ctxt->sax->startDocument(ctxt->userData); |
---|
4826 | |
---|
4827 | |
---|
4828 | /* |
---|
4829 | * The Misc part of the Prolog |
---|
4830 | */ |
---|
4831 | GROW; |
---|
4832 | docbParseMisc(ctxt); |
---|
4833 | |
---|
4834 | /* |
---|
4835 | * Then possibly doc type declaration(s) and more Misc |
---|
4836 | * (doctypedecl Misc*)? |
---|
4837 | */ |
---|
4838 | GROW; |
---|
4839 | if ((RAW == '<') && (NXT(1) == '!') && |
---|
4840 | (UPP(2) == 'D') && (UPP(3) == 'O') && |
---|
4841 | (UPP(4) == 'C') && (UPP(5) == 'T') && |
---|
4842 | (UPP(6) == 'Y') && (UPP(7) == 'P') && |
---|
4843 | (UPP(8) == 'E')) { |
---|
4844 | |
---|
4845 | ctxt->inSubset = 1; |
---|
4846 | docbParseDocTypeDecl(ctxt); |
---|
4847 | if (RAW == '[') { |
---|
4848 | ctxt->instate = XML_PARSER_DTD; |
---|
4849 | docbParseInternalSubset(ctxt); |
---|
4850 | } |
---|
4851 | |
---|
4852 | /* |
---|
4853 | * Create and update the external subset. |
---|
4854 | */ |
---|
4855 | ctxt->inSubset = 2; |
---|
4856 | if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && |
---|
4857 | (!ctxt->disableSAX)) |
---|
4858 | ctxt->sax->internalSubset(ctxt->userData, ctxt->intSubName, |
---|
4859 | ctxt->extSubSystem, ctxt->extSubURI); |
---|
4860 | ctxt->inSubset = 0; |
---|
4861 | |
---|
4862 | |
---|
4863 | ctxt->instate = XML_PARSER_PROLOG; |
---|
4864 | docbParseMisc(ctxt); |
---|
4865 | } |
---|
4866 | |
---|
4867 | /* |
---|
4868 | * Time to start parsing the tree itself |
---|
4869 | */ |
---|
4870 | docbParseContent(ctxt); |
---|
4871 | |
---|
4872 | /* |
---|
4873 | * autoclose |
---|
4874 | */ |
---|
4875 | if (CUR == 0) |
---|
4876 | docbAutoClose(ctxt, NULL); |
---|
4877 | |
---|
4878 | |
---|
4879 | /* |
---|
4880 | * SAX: end of the document processing. |
---|
4881 | */ |
---|
4882 | if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) |
---|
4883 | ctxt->sax->endDocument(ctxt->userData); |
---|
4884 | |
---|
4885 | if (ctxt->myDoc != NULL) { |
---|
4886 | dtd = ctxt->myDoc->intSubset; |
---|
4887 | ctxt->myDoc->standalone = -1; |
---|
4888 | if (dtd == NULL) |
---|
4889 | ctxt->myDoc->intSubset = |
---|
4890 | xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "SGML", |
---|
4891 | BAD_CAST "-//W3C//DTD SGML 4.0 Transitional//EN", |
---|
4892 | BAD_CAST "http://www.w3.org/TR/REC-docbook/loose.dtd"); |
---|
4893 | } |
---|
4894 | if (! ctxt->wellFormed) return(-1); |
---|
4895 | return(0); |
---|
4896 | } |
---|
4897 | |
---|
4898 | |
---|
4899 | /************************************************************************ |
---|
4900 | * * |
---|
4901 | * Parser contexts handling * |
---|
4902 | * * |
---|
4903 | ************************************************************************/ |
---|
4904 | |
---|
4905 | /** |
---|
4906 | * docbInitParserCtxt: |
---|
4907 | * @ctxt: an SGML parser context |
---|
4908 | * |
---|
4909 | * Initialize a parser context |
---|
4910 | */ |
---|
4911 | |
---|
4912 | static void |
---|
4913 | docbInitParserCtxt(docbParserCtxtPtr ctxt) |
---|
4914 | { |
---|
4915 | docbSAXHandler *sax; |
---|
4916 | |
---|
4917 | if (ctxt == NULL) return; |
---|
4918 | memset(ctxt, 0, sizeof(docbParserCtxt)); |
---|
4919 | |
---|
4920 | sax = (docbSAXHandler *) xmlMalloc(sizeof(docbSAXHandler)); |
---|
4921 | if (sax == NULL) { |
---|
4922 | xmlGenericError(xmlGenericErrorContext, |
---|
4923 | "docbInitParserCtxt: out of memory\n"); |
---|
4924 | } |
---|
4925 | memset(sax, 0, sizeof(docbSAXHandler)); |
---|
4926 | |
---|
4927 | /* Allocate the Input stack */ |
---|
4928 | ctxt->inputTab = (docbParserInputPtr *) |
---|
4929 | xmlMalloc(5 * sizeof(docbParserInputPtr)); |
---|
4930 | if (ctxt->inputTab == NULL) { |
---|
4931 | xmlGenericError(xmlGenericErrorContext, |
---|
4932 | "docbInitParserCtxt: out of memory\n"); |
---|
4933 | } |
---|
4934 | ctxt->inputNr = 0; |
---|
4935 | ctxt->inputMax = 5; |
---|
4936 | ctxt->input = NULL; |
---|
4937 | ctxt->version = NULL; |
---|
4938 | ctxt->encoding = NULL; |
---|
4939 | ctxt->standalone = -1; |
---|
4940 | ctxt->instate = XML_PARSER_START; |
---|
4941 | |
---|
4942 | /* Allocate the Node stack */ |
---|
4943 | ctxt->nodeTab = (docbNodePtr *) xmlMalloc(10 * sizeof(docbNodePtr)); |
---|
4944 | ctxt->nodeNr = 0; |
---|
4945 | ctxt->nodeMax = 10; |
---|
4946 | ctxt->node = NULL; |
---|
4947 | |
---|
4948 | /* Allocate the Name stack */ |
---|
4949 | ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); |
---|
4950 | ctxt->nameNr = 0; |
---|
4951 | ctxt->nameMax = 10; |
---|
4952 | ctxt->name = NULL; |
---|
4953 | |
---|
4954 | if (sax == NULL) ctxt->sax = &docbDefaultSAXHandler; |
---|
4955 | else { |
---|
4956 | ctxt->sax = sax; |
---|
4957 | memcpy(sax, &docbDefaultSAXHandler, sizeof(docbSAXHandler)); |
---|
4958 | } |
---|
4959 | ctxt->userData = ctxt; |
---|
4960 | ctxt->myDoc = NULL; |
---|
4961 | ctxt->wellFormed = 1; |
---|
4962 | ctxt->linenumbers = xmlLineNumbersDefaultValue; |
---|
4963 | ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue; |
---|
4964 | ctxt->html = 2; |
---|
4965 | ctxt->record_info = 0; |
---|
4966 | ctxt->validate = 0; |
---|
4967 | ctxt->nbChars = 0; |
---|
4968 | ctxt->checkIndex = 0; |
---|
4969 | xmlInitNodeInfoSeq(&ctxt->node_seq); |
---|
4970 | } |
---|
4971 | |
---|
4972 | /** |
---|
4973 | * docbFreeParserCtxt: |
---|
4974 | * @ctxt: an SGML parser context |
---|
4975 | * |
---|
4976 | * Free all the memory used by a parser context. However the parsed |
---|
4977 | * document in ctxt->myDoc is not freed. |
---|
4978 | */ |
---|
4979 | |
---|
4980 | void |
---|
4981 | docbFreeParserCtxt(docbParserCtxtPtr ctxt) |
---|
4982 | { |
---|
4983 | xmlFreeParserCtxt(ctxt); |
---|
4984 | } |
---|
4985 | |
---|
4986 | /** |
---|
4987 | * docbCreateDocParserCtxt : |
---|
4988 | * @cur: a pointer to an array of xmlChar |
---|
4989 | * @encoding: the SGML document encoding, or NULL |
---|
4990 | * |
---|
4991 | * Create a parser context for an SGML document. |
---|
4992 | * |
---|
4993 | * Returns the new parser context or NULL |
---|
4994 | */ |
---|
4995 | static docbParserCtxtPtr |
---|
4996 | docbCreateDocParserCtxt(xmlChar *cur, const char *encoding ATTRIBUTE_UNUSED) { |
---|
4997 | docbParserCtxtPtr ctxt; |
---|
4998 | docbParserInputPtr input; |
---|
4999 | /* sgmlCharEncoding enc; */ |
---|
5000 | |
---|
5001 | ctxt = (docbParserCtxtPtr) xmlMalloc(sizeof(docbParserCtxt)); |
---|
5002 | if (ctxt == NULL) { |
---|
5003 | perror("malloc"); |
---|
5004 | return(NULL); |
---|
5005 | } |
---|
5006 | docbInitParserCtxt(ctxt); |
---|
5007 | input = (docbParserInputPtr) xmlMalloc(sizeof(docbParserInput)); |
---|
5008 | if (input == NULL) { |
---|
5009 | perror("malloc"); |
---|
5010 | xmlFree(ctxt); |
---|
5011 | return(NULL); |
---|
5012 | } |
---|
5013 | memset(input, 0, sizeof(docbParserInput)); |
---|
5014 | |
---|
5015 | input->line = 1; |
---|
5016 | input->col = 1; |
---|
5017 | input->base = cur; |
---|
5018 | input->cur = cur; |
---|
5019 | |
---|
5020 | inputPush(ctxt, input); |
---|
5021 | return(ctxt); |
---|
5022 | } |
---|
5023 | |
---|
5024 | /************************************************************************ |
---|
5025 | * * |
---|
5026 | * Progressive parsing interfaces * |
---|
5027 | * * |
---|
5028 | ************************************************************************/ |
---|
5029 | |
---|
5030 | /** |
---|
5031 | * docbParseLookupSequence: |
---|
5032 | * @ctxt: an SGML parser context |
---|
5033 | * @first: the first char to lookup |
---|
5034 | * @next: the next char to lookup or zero |
---|
5035 | * @third: the next char to lookup or zero |
---|
5036 | * |
---|
5037 | * Try to find if a sequence (first, next, third) or just (first next) or |
---|
5038 | * (first) is available in the input stream. |
---|
5039 | * This function has a side effect of (possibly) incrementing ctxt->checkIndex |
---|
5040 | * to avoid rescanning sequences of bytes, it DOES change the state of the |
---|
5041 | * parser, do not use liberally. |
---|
5042 | * This is basically similar to xmlParseLookupSequence() |
---|
5043 | * |
---|
5044 | * Returns the index to the current parsing point if the full sequence |
---|
5045 | * is available, -1 otherwise. |
---|
5046 | */ |
---|
5047 | static int |
---|
5048 | docbParseLookupSequence(docbParserCtxtPtr ctxt, xmlChar first, |
---|
5049 | xmlChar next, xmlChar third) { |
---|
5050 | int base, len; |
---|
5051 | docbParserInputPtr in; |
---|
5052 | const xmlChar *buf; |
---|
5053 | |
---|
5054 | in = ctxt->input; |
---|
5055 | if (in == NULL) return(-1); |
---|
5056 | base = in->cur - in->base; |
---|
5057 | if (base < 0) return(-1); |
---|
5058 | if (ctxt->checkIndex > base) |
---|
5059 | base = ctxt->checkIndex; |
---|
5060 | if (in->buf == NULL) { |
---|
5061 | buf = in->base; |
---|
5062 | len = in->length; |
---|
5063 | } else { |
---|
5064 | buf = in->buf->buffer->content; |
---|
5065 | len = in->buf->buffer->use; |
---|
5066 | } |
---|
5067 | /* take into account the sequence length */ |
---|
5068 | if (third) len -= 2; |
---|
5069 | else if (next) len --; |
---|
5070 | for (;base < len;base++) { |
---|
5071 | if (buf[base] == first) { |
---|
5072 | if (third != 0) { |
---|
5073 | if ((buf[base + 1] != next) || |
---|
5074 | (buf[base + 2] != third)) continue; |
---|
5075 | } else if (next != 0) { |
---|
5076 | if (buf[base + 1] != next) continue; |
---|
5077 | } |
---|
5078 | ctxt->checkIndex = 0; |
---|
5079 | #ifdef DEBUG_PUSH |
---|
5080 | if (next == 0) |
---|
5081 | xmlGenericError(xmlGenericErrorContext, |
---|
5082 | "HPP: lookup '%c' found at %d\n", |
---|
5083 | first, base); |
---|
5084 | else if (third == 0) |
---|
5085 | xmlGenericError(xmlGenericErrorContext, |
---|
5086 | "HPP: lookup '%c%c' found at %d\n", |
---|
5087 | first, next, base); |
---|
5088 | else |
---|
5089 | xmlGenericError(xmlGenericErrorContext, |
---|
5090 | "HPP: lookup '%c%c%c' found at %d\n", |
---|
5091 | first, next, third, base); |
---|
5092 | #endif |
---|
5093 | return(base - (in->cur - in->base)); |
---|
5094 | } |
---|
5095 | } |
---|
5096 | ctxt->checkIndex = base; |
---|
5097 | #ifdef DEBUG_PUSH |
---|
5098 | if (next == 0) |
---|
5099 | xmlGenericError(xmlGenericErrorContext, |
---|
5100 | "HPP: lookup '%c' failed\n", first); |
---|
5101 | else if (third == 0) |
---|
5102 | xmlGenericError(xmlGenericErrorContext, |
---|
5103 | "HPP: lookup '%c%c' failed\n", first, next); |
---|
5104 | else |
---|
5105 | xmlGenericError(xmlGenericErrorContext, |
---|
5106 | "HPP: lookup '%c%c%c' failed\n", first, next, third); |
---|
5107 | #endif |
---|
5108 | return(-1); |
---|
5109 | } |
---|
5110 | |
---|
5111 | /** |
---|
5112 | * docbParseTryOrFinish: |
---|
5113 | * @ctxt: an SGML parser context |
---|
5114 | * @terminate: last chunk indicator |
---|
5115 | * |
---|
5116 | * Try to progress on parsing |
---|
5117 | * |
---|
5118 | * Returns zero if no parsing was possible |
---|
5119 | */ |
---|
5120 | static int |
---|
5121 | docbParseTryOrFinish(docbParserCtxtPtr ctxt, int terminate) { |
---|
5122 | int ret = 0; |
---|
5123 | docbParserInputPtr in; |
---|
5124 | int avail = 0; |
---|
5125 | xmlChar cur, next; |
---|
5126 | |
---|
5127 | #ifdef DEBUG_PUSH |
---|
5128 | switch (ctxt->instate) { |
---|
5129 | case XML_PARSER_EOF: |
---|
5130 | xmlGenericError(xmlGenericErrorContext, |
---|
5131 | "HPP: try EOF\n"); break; |
---|
5132 | case XML_PARSER_START: |
---|
5133 | xmlGenericError(xmlGenericErrorContext, |
---|
5134 | "HPP: try START\n"); break; |
---|
5135 | case XML_PARSER_MISC: |
---|
5136 | xmlGenericError(xmlGenericErrorContext, |
---|
5137 | "HPP: try MISC\n");break; |
---|
5138 | case XML_PARSER_COMMENT: |
---|
5139 | xmlGenericError(xmlGenericErrorContext, |
---|
5140 | "HPP: try COMMENT\n");break; |
---|
5141 | case XML_PARSER_PROLOG: |
---|
5142 | xmlGenericError(xmlGenericErrorContext, |
---|
5143 | "HPP: try PROLOG\n");break; |
---|
5144 | case XML_PARSER_START_TAG: |
---|
5145 | xmlGenericError(xmlGenericErrorContext, |
---|
5146 | "HPP: try START_TAG\n");break; |
---|
5147 | case XML_PARSER_CONTENT: |
---|
5148 | xmlGenericError(xmlGenericErrorContext, |
---|
5149 | "HPP: try CONTENT\n");break; |
---|
5150 | case XML_PARSER_CDATA_SECTION: |
---|
5151 | xmlGenericError(xmlGenericErrorContext, |
---|
5152 | "HPP: try CDATA_SECTION\n");break; |
---|
5153 | case XML_PARSER_END_TAG: |
---|
5154 | xmlGenericError(xmlGenericErrorContext, |
---|
5155 | "HPP: try END_TAG\n");break; |
---|
5156 | case XML_PARSER_ENTITY_DECL: |
---|
5157 | xmlGenericError(xmlGenericErrorContext, |
---|
5158 | "HPP: try ENTITY_DECL\n");break; |
---|
5159 | case XML_PARSER_ENTITY_VALUE: |
---|
5160 | xmlGenericError(xmlGenericErrorContext, |
---|
5161 | "HPP: try ENTITY_VALUE\n");break; |
---|
5162 | case XML_PARSER_ATTRIBUTE_VALUE: |
---|
5163 | xmlGenericError(xmlGenericErrorContext, |
---|
5164 | "HPP: try ATTRIBUTE_VALUE\n");break; |
---|
5165 | case XML_PARSER_DTD: |
---|
5166 | xmlGenericError(xmlGenericErrorContext, |
---|
5167 | "HPP: try DTD\n");break; |
---|
5168 | case XML_PARSER_EPILOG: |
---|
5169 | xmlGenericError(xmlGenericErrorContext, |
---|
5170 | "HPP: try EPILOG\n");break; |
---|
5171 | case XML_PARSER_PI: |
---|
5172 | xmlGenericError(xmlGenericErrorContext, |
---|
5173 | "HPP: try PI\n");break; |
---|
5174 | } |
---|
5175 | #endif |
---|
5176 | |
---|
5177 | while (1) { |
---|
5178 | |
---|
5179 | in = ctxt->input; |
---|
5180 | if (in == NULL) break; |
---|
5181 | if (in->buf == NULL) |
---|
5182 | avail = in->length - (in->cur - in->base); |
---|
5183 | else |
---|
5184 | avail = in->buf->buffer->use - (in->cur - in->base); |
---|
5185 | if ((avail == 0) && (terminate)) { |
---|
5186 | docbAutoClose(ctxt, NULL); |
---|
5187 | if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { |
---|
5188 | /* |
---|
5189 | * SAX: end of the document processing. |
---|
5190 | */ |
---|
5191 | ctxt->instate = XML_PARSER_EOF; |
---|
5192 | if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) |
---|
5193 | ctxt->sax->endDocument(ctxt->userData); |
---|
5194 | } |
---|
5195 | } |
---|
5196 | if (avail < 1) |
---|
5197 | goto done; |
---|
5198 | switch (ctxt->instate) { |
---|
5199 | case XML_PARSER_EOF: |
---|
5200 | /* |
---|
5201 | * Document parsing is done ! |
---|
5202 | */ |
---|
5203 | goto done; |
---|
5204 | case XML_PARSER_START: |
---|
5205 | /* |
---|
5206 | * Very first chars read from the document flow. |
---|
5207 | */ |
---|
5208 | cur = in->cur[0]; |
---|
5209 | if (IS_BLANK(cur)) { |
---|
5210 | SKIP_BLANKS; |
---|
5211 | if (in->buf == NULL) |
---|
5212 | avail = in->length - (in->cur - in->base); |
---|
5213 | else |
---|
5214 | avail = in->buf->buffer->use - (in->cur - in->base); |
---|
5215 | } |
---|
5216 | if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) |
---|
5217 | ctxt->sax->setDocumentLocator(ctxt->userData, |
---|
5218 | &xmlDefaultSAXLocator); |
---|
5219 | if ((ctxt->sax) && (ctxt->sax->startDocument) && |
---|
5220 | (!ctxt->disableSAX)) |
---|
5221 | ctxt->sax->startDocument(ctxt->userData); |
---|
5222 | |
---|
5223 | cur = in->cur[0]; |
---|
5224 | next = in->cur[1]; |
---|
5225 | if ((cur == '<') && (next == '!') && |
---|
5226 | (UPP(2) == 'D') && (UPP(3) == 'O') && |
---|
5227 | (UPP(4) == 'C') && (UPP(5) == 'T') && |
---|
5228 | (UPP(6) == 'Y') && (UPP(7) == 'P') && |
---|
5229 | (UPP(8) == 'E')) { |
---|
5230 | if ((!terminate) && |
---|
5231 | (docbParseLookupSequence(ctxt, '>', 0, 0) < 0)) |
---|
5232 | goto done; |
---|
5233 | #ifdef DEBUG_PUSH |
---|
5234 | xmlGenericError(xmlGenericErrorContext, |
---|
5235 | "HPP: Parsing internal subset\n"); |
---|
5236 | #endif |
---|
5237 | docbParseDocTypeDecl(ctxt); |
---|
5238 | ctxt->instate = XML_PARSER_PROLOG; |
---|
5239 | #ifdef DEBUG_PUSH |
---|
5240 | xmlGenericError(xmlGenericErrorContext, |
---|
5241 | "HPP: entering PROLOG\n"); |
---|
5242 | #endif |
---|
5243 | } else { |
---|
5244 | ctxt->instate = XML_PARSER_MISC; |
---|
5245 | } |
---|
5246 | #ifdef DEBUG_PUSH |
---|
5247 | xmlGenericError(xmlGenericErrorContext, |
---|
5248 | "HPP: entering MISC\n"); |
---|
5249 | #endif |
---|
5250 | break; |
---|
5251 | case XML_PARSER_MISC: |
---|
5252 | SKIP_BLANKS; |
---|
5253 | if (in->buf == NULL) |
---|
5254 | avail = in->length - (in->cur - in->base); |
---|
5255 | else |
---|
5256 | avail = in->buf->buffer->use - (in->cur - in->base); |
---|
5257 | if (avail < 2) |
---|
5258 | goto done; |
---|
5259 | cur = in->cur[0]; |
---|
5260 | next = in->cur[1]; |
---|
5261 | if ((cur == '<') && (next == '!') && |
---|
5262 | (in->cur[2] == '-') && (in->cur[3] == '-')) { |
---|
5263 | if ((!terminate) && |
---|
5264 | (docbParseLookupSequence(ctxt, '-', '-', '>') < 0)) |
---|
5265 | goto done; |
---|
5266 | #ifdef DEBUG_PUSH |
---|
5267 | xmlGenericError(xmlGenericErrorContext, |
---|
5268 | "HPP: Parsing Comment\n"); |
---|
5269 | #endif |
---|
5270 | docbParseComment(ctxt); |
---|
5271 | ctxt->instate = XML_PARSER_MISC; |
---|
5272 | } else if ((cur == '<') && (next == '!') && |
---|
5273 | (UPP(2) == 'D') && (UPP(3) == 'O') && |
---|
5274 | (UPP(4) == 'C') && (UPP(5) == 'T') && |
---|
5275 | (UPP(6) == 'Y') && (UPP(7) == 'P') && |
---|
5276 | (UPP(8) == 'E')) { |
---|
5277 | if ((!terminate) && |
---|
5278 | (docbParseLookupSequence(ctxt, '>', 0, 0) < 0)) |
---|
5279 | goto done; |
---|
5280 | #ifdef DEBUG_PUSH |
---|
5281 | xmlGenericError(xmlGenericErrorContext, |
---|
5282 | "HPP: Parsing internal subset\n"); |
---|
5283 | #endif |
---|
5284 | docbParseDocTypeDecl(ctxt); |
---|
5285 | ctxt->instate = XML_PARSER_PROLOG; |
---|
5286 | #ifdef DEBUG_PUSH |
---|
5287 | xmlGenericError(xmlGenericErrorContext, |
---|
5288 | "HPP: entering PROLOG\n"); |
---|
5289 | #endif |
---|
5290 | } else if ((cur == '<') && (next == '!') && |
---|
5291 | (avail < 9)) { |
---|
5292 | goto done; |
---|
5293 | } else { |
---|
5294 | ctxt->instate = XML_PARSER_START_TAG; |
---|
5295 | #ifdef DEBUG_PUSH |
---|
5296 | xmlGenericError(xmlGenericErrorContext, |
---|
5297 | "HPP: entering START_TAG\n"); |
---|
5298 | #endif |
---|
5299 | } |
---|
5300 | break; |
---|
5301 | case XML_PARSER_PROLOG: |
---|
5302 | SKIP_BLANKS; |
---|
5303 | if (in->buf == NULL) |
---|
5304 | avail = in->length - (in->cur - in->base); |
---|
5305 | else |
---|
5306 | avail = in->buf->buffer->use - (in->cur - in->base); |
---|
5307 | if (avail < 2) |
---|
5308 | goto done; |
---|
5309 | cur = in->cur[0]; |
---|
5310 | next = in->cur[1]; |
---|
5311 | if ((cur == '<') && (next == '!') && |
---|
5312 | (in->cur[2] == '-') && (in->cur[3] == '-')) { |
---|
5313 | if ((!terminate) && |
---|
5314 | (docbParseLookupSequence(ctxt, '-', '-', '>') < 0)) |
---|
5315 | goto done; |
---|
5316 | #ifdef DEBUG_PUSH |
---|
5317 | xmlGenericError(xmlGenericErrorContext, |
---|
5318 | "HPP: Parsing Comment\n"); |
---|
5319 | #endif |
---|
5320 | docbParseComment(ctxt); |
---|
5321 | ctxt->instate = XML_PARSER_PROLOG; |
---|
5322 | } else if ((cur == '<') && (next == '!') && |
---|
5323 | (avail < 4)) { |
---|
5324 | goto done; |
---|
5325 | } else { |
---|
5326 | ctxt->instate = XML_PARSER_START_TAG; |
---|
5327 | #ifdef DEBUG_PUSH |
---|
5328 | xmlGenericError(xmlGenericErrorContext, |
---|
5329 | "HPP: entering START_TAG\n"); |
---|
5330 | #endif |
---|
5331 | } |
---|
5332 | break; |
---|
5333 | case XML_PARSER_EPILOG: |
---|
5334 | if (in->buf == NULL) |
---|
5335 | avail = in->length - (in->cur - in->base); |
---|
5336 | else |
---|
5337 | avail = in->buf->buffer->use - (in->cur - in->base); |
---|
5338 | if (avail < 1) |
---|
5339 | goto done; |
---|
5340 | cur = in->cur[0]; |
---|
5341 | if (IS_BLANK(cur)) { |
---|
5342 | docbParseCharData(ctxt); |
---|
5343 | goto done; |
---|
5344 | } |
---|
5345 | if (avail < 2) |
---|
5346 | goto done; |
---|
5347 | next = in->cur[1]; |
---|
5348 | if ((cur == '<') && (next == '!') && |
---|
5349 | (in->cur[2] == '-') && (in->cur[3] == '-')) { |
---|
5350 | if ((!terminate) && |
---|
5351 | (docbParseLookupSequence(ctxt, '-', '-', '>') < 0)) |
---|
5352 | goto done; |
---|
5353 | #ifdef DEBUG_PUSH |
---|
5354 | xmlGenericError(xmlGenericErrorContext, |
---|
5355 | "HPP: Parsing Comment\n"); |
---|
5356 | #endif |
---|
5357 | docbParseComment(ctxt); |
---|
5358 | ctxt->instate = XML_PARSER_EPILOG; |
---|
5359 | } else if ((cur == '<') && (next == '!') && |
---|
5360 | (avail < 4)) { |
---|
5361 | goto done; |
---|
5362 | } else { |
---|
5363 | ctxt->errNo = XML_ERR_DOCUMENT_END; |
---|
5364 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
5365 | ctxt->sax->error(ctxt->userData, |
---|
5366 | "Extra content at the end of the document\n"); |
---|
5367 | ctxt->wellFormed = 0; |
---|
5368 | ctxt->instate = XML_PARSER_EOF; |
---|
5369 | #ifdef DEBUG_PUSH |
---|
5370 | xmlGenericError(xmlGenericErrorContext, |
---|
5371 | "HPP: entering EOF\n"); |
---|
5372 | #endif |
---|
5373 | if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) |
---|
5374 | ctxt->sax->endDocument(ctxt->userData); |
---|
5375 | goto done; |
---|
5376 | } |
---|
5377 | break; |
---|
5378 | case XML_PARSER_START_TAG: { |
---|
5379 | xmlChar *name, *oldname; |
---|
5380 | int depth = ctxt->nameNr; |
---|
5381 | docbElemDescPtr info; |
---|
5382 | |
---|
5383 | if (avail < 2) |
---|
5384 | goto done; |
---|
5385 | cur = in->cur[0]; |
---|
5386 | if (cur != '<') { |
---|
5387 | ctxt->instate = XML_PARSER_CONTENT; |
---|
5388 | #ifdef DEBUG_PUSH |
---|
5389 | xmlGenericError(xmlGenericErrorContext, |
---|
5390 | "HPP: entering CONTENT\n"); |
---|
5391 | #endif |
---|
5392 | break; |
---|
5393 | } |
---|
5394 | if ((!terminate) && |
---|
5395 | (docbParseLookupSequence(ctxt, '>', 0, 0) < 0)) |
---|
5396 | goto done; |
---|
5397 | |
---|
5398 | oldname = xmlStrdup(ctxt->name); |
---|
5399 | docbParseStartTag(ctxt); |
---|
5400 | name = ctxt->name; |
---|
5401 | #ifdef DEBUG |
---|
5402 | if (oldname == NULL) |
---|
5403 | xmlGenericError(xmlGenericErrorContext, |
---|
5404 | "Start of element %s\n", name); |
---|
5405 | else if (name == NULL) |
---|
5406 | xmlGenericError(xmlGenericErrorContext, |
---|
5407 | "Start of element failed, was %s\n", |
---|
5408 | oldname); |
---|
5409 | else |
---|
5410 | xmlGenericError(xmlGenericErrorContext, |
---|
5411 | "Start of element %s, was %s\n", |
---|
5412 | name, oldname); |
---|
5413 | #endif |
---|
5414 | if (((depth == ctxt->nameNr) && |
---|
5415 | (xmlStrEqual(oldname, ctxt->name))) || |
---|
5416 | (name == NULL)) { |
---|
5417 | if (CUR == '>') |
---|
5418 | NEXT; |
---|
5419 | if (oldname != NULL) |
---|
5420 | xmlFree(oldname); |
---|
5421 | break; |
---|
5422 | } |
---|
5423 | if (oldname != NULL) |
---|
5424 | xmlFree(oldname); |
---|
5425 | |
---|
5426 | /* |
---|
5427 | * Lookup the info for that element. |
---|
5428 | */ |
---|
5429 | info = docbTagLookup(name); |
---|
5430 | if (info == NULL) { |
---|
5431 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
5432 | ctxt->sax->error(ctxt->userData, "Tag %s unknown\n", |
---|
5433 | name); |
---|
5434 | ctxt->wellFormed = 0; |
---|
5435 | } else if (info->depr) { |
---|
5436 | /*************************** |
---|
5437 | if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) |
---|
5438 | ctxt->sax->warning(ctxt->userData, |
---|
5439 | "Tag %s is deprecated\n", |
---|
5440 | name); |
---|
5441 | ***************************/ |
---|
5442 | } |
---|
5443 | |
---|
5444 | /* |
---|
5445 | * Check for an Empty Element labeled the XML/SGML way |
---|
5446 | */ |
---|
5447 | if ((CUR == '/') && (NXT(1) == '>')) { |
---|
5448 | SKIP(2); |
---|
5449 | if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) |
---|
5450 | ctxt->sax->endElement(ctxt->userData, name); |
---|
5451 | oldname = docbnamePop(ctxt); |
---|
5452 | #ifdef DEBUG |
---|
5453 | xmlGenericError(xmlGenericErrorContext,"End of tag the XML way: popping out %s\n", |
---|
5454 | oldname); |
---|
5455 | #endif |
---|
5456 | if (oldname != NULL) |
---|
5457 | xmlFree(oldname); |
---|
5458 | ctxt->instate = XML_PARSER_CONTENT; |
---|
5459 | #ifdef DEBUG_PUSH |
---|
5460 | xmlGenericError(xmlGenericErrorContext, |
---|
5461 | "HPP: entering CONTENT\n"); |
---|
5462 | #endif |
---|
5463 | break; |
---|
5464 | } |
---|
5465 | |
---|
5466 | if (CUR == '>') { |
---|
5467 | NEXT; |
---|
5468 | } else { |
---|
5469 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
5470 | ctxt->sax->error(ctxt->userData, |
---|
5471 | "Couldn't find end of Start Tag %s\n", |
---|
5472 | name); |
---|
5473 | ctxt->wellFormed = 0; |
---|
5474 | |
---|
5475 | /* |
---|
5476 | * end of parsing of this node. |
---|
5477 | */ |
---|
5478 | if (xmlStrEqual(name, ctxt->name)) { |
---|
5479 | nodePop(ctxt); |
---|
5480 | oldname = docbnamePop(ctxt); |
---|
5481 | #ifdef DEBUG |
---|
5482 | xmlGenericError(xmlGenericErrorContext, |
---|
5483 | "End of start tag problem: popping out %s\n", oldname); |
---|
5484 | #endif |
---|
5485 | if (oldname != NULL) |
---|
5486 | xmlFree(oldname); |
---|
5487 | } |
---|
5488 | |
---|
5489 | ctxt->instate = XML_PARSER_CONTENT; |
---|
5490 | #ifdef DEBUG_PUSH |
---|
5491 | xmlGenericError(xmlGenericErrorContext, |
---|
5492 | "HPP: entering CONTENT\n"); |
---|
5493 | #endif |
---|
5494 | break; |
---|
5495 | } |
---|
5496 | |
---|
5497 | /* |
---|
5498 | * Check for an Empty Element from DTD definition |
---|
5499 | */ |
---|
5500 | if ((info != NULL) && (info->empty)) { |
---|
5501 | if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) |
---|
5502 | ctxt->sax->endElement(ctxt->userData, name); |
---|
5503 | oldname = docbnamePop(ctxt); |
---|
5504 | #ifdef DEBUG |
---|
5505 | xmlGenericError(xmlGenericErrorContext,"End of empty tag %s : popping out %s\n", name, oldname); |
---|
5506 | #endif |
---|
5507 | if (oldname != NULL) |
---|
5508 | xmlFree(oldname); |
---|
5509 | } |
---|
5510 | ctxt->instate = XML_PARSER_CONTENT; |
---|
5511 | #ifdef DEBUG_PUSH |
---|
5512 | xmlGenericError(xmlGenericErrorContext, |
---|
5513 | "HPP: entering CONTENT\n"); |
---|
5514 | #endif |
---|
5515 | break; |
---|
5516 | } |
---|
5517 | case XML_PARSER_CONTENT: { |
---|
5518 | long cons; |
---|
5519 | /* |
---|
5520 | * Handle preparsed entities and charRef |
---|
5521 | */ |
---|
5522 | if (ctxt->token != 0) { |
---|
5523 | xmlChar chr[2] = { 0 , 0 } ; |
---|
5524 | |
---|
5525 | chr[0] = (xmlChar) ctxt->token; |
---|
5526 | if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) |
---|
5527 | ctxt->sax->characters(ctxt->userData, chr, 1); |
---|
5528 | ctxt->token = 0; |
---|
5529 | ctxt->checkIndex = 0; |
---|
5530 | } |
---|
5531 | if ((avail == 1) && (terminate)) { |
---|
5532 | cur = in->cur[0]; |
---|
5533 | if ((cur != '<') && (cur != '&')) { |
---|
5534 | if (ctxt->sax != NULL) { |
---|
5535 | if (IS_BLANK(cur)) { |
---|
5536 | if (ctxt->sax->ignorableWhitespace != NULL) |
---|
5537 | ctxt->sax->ignorableWhitespace( |
---|
5538 | ctxt->userData, &cur, 1); |
---|
5539 | } else { |
---|
5540 | if (ctxt->sax->characters != NULL) |
---|
5541 | ctxt->sax->characters( |
---|
5542 | ctxt->userData, &cur, 1); |
---|
5543 | } |
---|
5544 | } |
---|
5545 | ctxt->token = 0; |
---|
5546 | ctxt->checkIndex = 0; |
---|
5547 | NEXT; |
---|
5548 | } |
---|
5549 | break; |
---|
5550 | } |
---|
5551 | if (avail < 2) |
---|
5552 | goto done; |
---|
5553 | cur = in->cur[0]; |
---|
5554 | next = in->cur[1]; |
---|
5555 | cons = ctxt->nbChars; |
---|
5556 | /* |
---|
5557 | * Sometimes DOCTYPE arrives in the middle of the document |
---|
5558 | */ |
---|
5559 | if ((cur == '<') && (next == '!') && |
---|
5560 | (UPP(2) == 'D') && (UPP(3) == 'O') && |
---|
5561 | (UPP(4) == 'C') && (UPP(5) == 'T') && |
---|
5562 | (UPP(6) == 'Y') && (UPP(7) == 'P') && |
---|
5563 | (UPP(8) == 'E')) { |
---|
5564 | if ((!terminate) && |
---|
5565 | (docbParseLookupSequence(ctxt, '>', 0, 0) < 0)) |
---|
5566 | goto done; |
---|
5567 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
5568 | ctxt->sax->error(ctxt->userData, |
---|
5569 | "Misplaced DOCTYPE declaration\n"); |
---|
5570 | ctxt->wellFormed = 0; |
---|
5571 | docbParseDocTypeDecl(ctxt); |
---|
5572 | } else if ((cur == '<') && (next == '!') && |
---|
5573 | (in->cur[2] == '-') && (in->cur[3] == '-')) { |
---|
5574 | if ((!terminate) && |
---|
5575 | (docbParseLookupSequence(ctxt, '-', '-', '>') < 0)) |
---|
5576 | goto done; |
---|
5577 | #ifdef DEBUG_PUSH |
---|
5578 | xmlGenericError(xmlGenericErrorContext, |
---|
5579 | "HPP: Parsing Comment\n"); |
---|
5580 | #endif |
---|
5581 | docbParseComment(ctxt); |
---|
5582 | ctxt->instate = XML_PARSER_CONTENT; |
---|
5583 | } else if ((cur == '<') && (next == '!') && (avail < 4)) { |
---|
5584 | goto done; |
---|
5585 | } else if ((cur == '<') && (next == '/')) { |
---|
5586 | ctxt->instate = XML_PARSER_END_TAG; |
---|
5587 | ctxt->checkIndex = 0; |
---|
5588 | #ifdef DEBUG_PUSH |
---|
5589 | xmlGenericError(xmlGenericErrorContext, |
---|
5590 | "HPP: entering END_TAG\n"); |
---|
5591 | #endif |
---|
5592 | break; |
---|
5593 | } else if (cur == '<') { |
---|
5594 | ctxt->instate = XML_PARSER_START_TAG; |
---|
5595 | ctxt->checkIndex = 0; |
---|
5596 | #ifdef DEBUG_PUSH |
---|
5597 | xmlGenericError(xmlGenericErrorContext, |
---|
5598 | "HPP: entering START_TAG\n"); |
---|
5599 | #endif |
---|
5600 | break; |
---|
5601 | } else if (cur == '&') { |
---|
5602 | if ((!terminate) && |
---|
5603 | (docbParseLookupSequence(ctxt, ';', 0, 0) < 0)) |
---|
5604 | goto done; |
---|
5605 | #ifdef DEBUG_PUSH |
---|
5606 | xmlGenericError(xmlGenericErrorContext, |
---|
5607 | "HPP: Parsing Reference\n"); |
---|
5608 | #endif |
---|
5609 | /* TODO: check generation of subtrees if noent !!! */ |
---|
5610 | docbParseReference(ctxt); |
---|
5611 | } else { |
---|
5612 | /* TODO Avoid the extra copy, handle directly !!!!!! */ |
---|
5613 | /* |
---|
5614 | * Goal of the following test is : |
---|
5615 | * - minimize calls to the SAX 'character' callback |
---|
5616 | * when they are mergeable |
---|
5617 | */ |
---|
5618 | if ((ctxt->inputNr == 1) && |
---|
5619 | (avail < DOCB_PARSER_BIG_BUFFER_SIZE)) { |
---|
5620 | if ((!terminate) && |
---|
5621 | (docbParseLookupSequence(ctxt, '<', 0, 0) < 0)) |
---|
5622 | goto done; |
---|
5623 | } |
---|
5624 | ctxt->checkIndex = 0; |
---|
5625 | #ifdef DEBUG_PUSH |
---|
5626 | xmlGenericError(xmlGenericErrorContext, |
---|
5627 | "HPP: Parsing char data\n"); |
---|
5628 | #endif |
---|
5629 | docbParseCharData(ctxt); |
---|
5630 | } |
---|
5631 | if (cons == ctxt->nbChars) { |
---|
5632 | if (ctxt->node != NULL) { |
---|
5633 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
5634 | ctxt->sax->error(ctxt->userData, |
---|
5635 | "detected an error in element content\n"); |
---|
5636 | ctxt->wellFormed = 0; |
---|
5637 | NEXT; |
---|
5638 | } |
---|
5639 | break; |
---|
5640 | } |
---|
5641 | |
---|
5642 | break; |
---|
5643 | } |
---|
5644 | case XML_PARSER_END_TAG: |
---|
5645 | if (avail < 2) |
---|
5646 | goto done; |
---|
5647 | if ((!terminate) && |
---|
5648 | (docbParseLookupSequence(ctxt, '>', 0, 0) < 0)) |
---|
5649 | goto done; |
---|
5650 | docbParseEndTag(ctxt); |
---|
5651 | if (ctxt->nameNr == 0) { |
---|
5652 | ctxt->instate = XML_PARSER_EPILOG; |
---|
5653 | } else { |
---|
5654 | ctxt->instate = XML_PARSER_CONTENT; |
---|
5655 | } |
---|
5656 | ctxt->checkIndex = 0; |
---|
5657 | #ifdef DEBUG_PUSH |
---|
5658 | xmlGenericError(xmlGenericErrorContext, |
---|
5659 | "HPP: entering CONTENT\n"); |
---|
5660 | #endif |
---|
5661 | break; |
---|
5662 | case XML_PARSER_CDATA_SECTION: |
---|
5663 | xmlGenericError(xmlGenericErrorContext, |
---|
5664 | "HPP: internal error, state == CDATA\n"); |
---|
5665 | ctxt->instate = XML_PARSER_CONTENT; |
---|
5666 | ctxt->checkIndex = 0; |
---|
5667 | #ifdef DEBUG_PUSH |
---|
5668 | xmlGenericError(xmlGenericErrorContext, |
---|
5669 | "HPP: entering CONTENT\n"); |
---|
5670 | #endif |
---|
5671 | break; |
---|
5672 | case XML_PARSER_DTD: |
---|
5673 | xmlGenericError(xmlGenericErrorContext, |
---|
5674 | "HPP: internal error, state == DTD\n"); |
---|
5675 | ctxt->instate = XML_PARSER_CONTENT; |
---|
5676 | ctxt->checkIndex = 0; |
---|
5677 | #ifdef DEBUG_PUSH |
---|
5678 | xmlGenericError(xmlGenericErrorContext, |
---|
5679 | "HPP: entering CONTENT\n"); |
---|
5680 | #endif |
---|
5681 | break; |
---|
5682 | case XML_PARSER_COMMENT: |
---|
5683 | xmlGenericError(xmlGenericErrorContext, |
---|
5684 | "HPP: internal error, state == COMMENT\n"); |
---|
5685 | ctxt->instate = XML_PARSER_CONTENT; |
---|
5686 | ctxt->checkIndex = 0; |
---|
5687 | #ifdef DEBUG_PUSH |
---|
5688 | xmlGenericError(xmlGenericErrorContext, |
---|
5689 | "HPP: entering CONTENT\n"); |
---|
5690 | #endif |
---|
5691 | break; |
---|
5692 | case XML_PARSER_PI: |
---|
5693 | xmlGenericError(xmlGenericErrorContext, |
---|
5694 | "HPP: internal error, state == PI\n"); |
---|
5695 | ctxt->instate = XML_PARSER_CONTENT; |
---|
5696 | ctxt->checkIndex = 0; |
---|
5697 | #ifdef DEBUG_PUSH |
---|
5698 | xmlGenericError(xmlGenericErrorContext, |
---|
5699 | "HPP: entering CONTENT\n"); |
---|
5700 | #endif |
---|
5701 | break; |
---|
5702 | case XML_PARSER_ENTITY_DECL: |
---|
5703 | xmlGenericError(xmlGenericErrorContext, |
---|
5704 | "HPP: internal error, state == ENTITY_DECL\n"); |
---|
5705 | ctxt->instate = XML_PARSER_CONTENT; |
---|
5706 | ctxt->checkIndex = 0; |
---|
5707 | #ifdef DEBUG_PUSH |
---|
5708 | xmlGenericError(xmlGenericErrorContext, |
---|
5709 | "HPP: entering CONTENT\n"); |
---|
5710 | #endif |
---|
5711 | break; |
---|
5712 | case XML_PARSER_ENTITY_VALUE: |
---|
5713 | xmlGenericError(xmlGenericErrorContext, |
---|
5714 | "HPP: internal error, state == ENTITY_VALUE\n"); |
---|
5715 | ctxt->instate = XML_PARSER_CONTENT; |
---|
5716 | ctxt->checkIndex = 0; |
---|
5717 | #ifdef DEBUG_PUSH |
---|
5718 | xmlGenericError(xmlGenericErrorContext, |
---|
5719 | "HPP: entering DTD\n"); |
---|
5720 | #endif |
---|
5721 | break; |
---|
5722 | case XML_PARSER_ATTRIBUTE_VALUE: |
---|
5723 | xmlGenericError(xmlGenericErrorContext, |
---|
5724 | "HPP: internal error, state == ATTRIBUTE_VALUE\n"); |
---|
5725 | ctxt->instate = XML_PARSER_START_TAG; |
---|
5726 | ctxt->checkIndex = 0; |
---|
5727 | #ifdef DEBUG_PUSH |
---|
5728 | xmlGenericError(xmlGenericErrorContext, |
---|
5729 | "HPP: entering START_TAG\n"); |
---|
5730 | #endif |
---|
5731 | break; |
---|
5732 | case XML_PARSER_SYSTEM_LITERAL: |
---|
5733 | xmlGenericError(xmlGenericErrorContext, |
---|
5734 | "HPP: internal error, state == XML_PARSER_SYSTEM_LITERAL\n"); |
---|
5735 | ctxt->instate = XML_PARSER_CONTENT; |
---|
5736 | ctxt->checkIndex = 0; |
---|
5737 | #ifdef DEBUG_PUSH |
---|
5738 | xmlGenericError(xmlGenericErrorContext, |
---|
5739 | "HPP: entering CONTENT\n"); |
---|
5740 | #endif |
---|
5741 | break; |
---|
5742 | |
---|
5743 | case XML_PARSER_IGNORE: |
---|
5744 | xmlGenericError(xmlGenericErrorContext, |
---|
5745 | "HPP: internal error, state == XML_PARSER_IGNORE\n"); |
---|
5746 | ctxt->instate = XML_PARSER_CONTENT; |
---|
5747 | ctxt->checkIndex = 0; |
---|
5748 | #ifdef DEBUG_PUSH |
---|
5749 | xmlGenericError(xmlGenericErrorContext, |
---|
5750 | "HPP: entering CONTENT\n"); |
---|
5751 | #endif |
---|
5752 | break; |
---|
5753 | } |
---|
5754 | } |
---|
5755 | done: |
---|
5756 | if ((avail == 0) && (terminate)) { |
---|
5757 | docbAutoClose(ctxt, NULL); |
---|
5758 | if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { |
---|
5759 | /* |
---|
5760 | * SAX: end of the document processing. |
---|
5761 | */ |
---|
5762 | ctxt->instate = XML_PARSER_EOF; |
---|
5763 | if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) |
---|
5764 | ctxt->sax->endDocument(ctxt->userData); |
---|
5765 | } |
---|
5766 | } |
---|
5767 | if ((ctxt->myDoc != NULL) && |
---|
5768 | ((terminate) || (ctxt->instate == XML_PARSER_EOF) || |
---|
5769 | (ctxt->instate == XML_PARSER_EPILOG))) { |
---|
5770 | xmlDtdPtr dtd; |
---|
5771 | dtd = ctxt->myDoc->intSubset; |
---|
5772 | if (dtd == NULL) |
---|
5773 | ctxt->myDoc->intSubset = |
---|
5774 | xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "SGML", |
---|
5775 | BAD_CAST "-//W3C//DTD SGML 4.0 Transitional//EN", |
---|
5776 | BAD_CAST "http://www.w3.org/TR/REC-docbook/loose.dtd"); |
---|
5777 | } |
---|
5778 | #ifdef DEBUG_PUSH |
---|
5779 | xmlGenericError(xmlGenericErrorContext, "HPP: done %d\n", ret); |
---|
5780 | #endif |
---|
5781 | return(ret); |
---|
5782 | } |
---|
5783 | |
---|
5784 | /** |
---|
5785 | * docbParseChunk: |
---|
5786 | * @ctxt: an XML parser context |
---|
5787 | * @chunk: an char array |
---|
5788 | * @size: the size in byte of the chunk |
---|
5789 | * @terminate: last chunk indicator |
---|
5790 | * |
---|
5791 | * Parse a Chunk of memory |
---|
5792 | * |
---|
5793 | * Returns zero if no error, the xmlParserErrors otherwise. |
---|
5794 | */ |
---|
5795 | int |
---|
5796 | docbParseChunk(docbParserCtxtPtr ctxt, const char *chunk, int size, |
---|
5797 | int terminate) { |
---|
5798 | if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && |
---|
5799 | (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { |
---|
5800 | int base = ctxt->input->base - ctxt->input->buf->buffer->content; |
---|
5801 | int cur = ctxt->input->cur - ctxt->input->base; |
---|
5802 | |
---|
5803 | xmlParserInputBufferPush(ctxt->input->buf, size, chunk); |
---|
5804 | ctxt->input->base = ctxt->input->buf->buffer->content + base; |
---|
5805 | ctxt->input->cur = ctxt->input->base + cur; |
---|
5806 | #ifdef DEBUG_PUSH |
---|
5807 | xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); |
---|
5808 | #endif |
---|
5809 | |
---|
5810 | if ((terminate) || (ctxt->input->buf->buffer->use > 80)) |
---|
5811 | docbParseTryOrFinish(ctxt, terminate); |
---|
5812 | } else if (ctxt->instate != XML_PARSER_EOF) { |
---|
5813 | xmlParserInputBufferPush(ctxt->input->buf, 0, ""); |
---|
5814 | docbParseTryOrFinish(ctxt, terminate); |
---|
5815 | } |
---|
5816 | if (terminate) { |
---|
5817 | if ((ctxt->instate != XML_PARSER_EOF) && |
---|
5818 | (ctxt->instate != XML_PARSER_EPILOG) && |
---|
5819 | (ctxt->instate != XML_PARSER_MISC)) { |
---|
5820 | ctxt->errNo = XML_ERR_DOCUMENT_END; |
---|
5821 | if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
---|
5822 | ctxt->sax->error(ctxt->userData, |
---|
5823 | "Extra content at the end of the document\n"); |
---|
5824 | ctxt->wellFormed = 0; |
---|
5825 | } |
---|
5826 | if (ctxt->instate != XML_PARSER_EOF) { |
---|
5827 | if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) |
---|
5828 | ctxt->sax->endDocument(ctxt->userData); |
---|
5829 | } |
---|
5830 | ctxt->instate = XML_PARSER_EOF; |
---|
5831 | } |
---|
5832 | return((xmlParserErrors) ctxt->errNo); |
---|
5833 | } |
---|
5834 | |
---|
5835 | /************************************************************************ |
---|
5836 | * * |
---|
5837 | * User entry points * |
---|
5838 | * * |
---|
5839 | ************************************************************************/ |
---|
5840 | |
---|
5841 | /** |
---|
5842 | * docbCreatePushParserCtxt : |
---|
5843 | * @sax: a SAX handler |
---|
5844 | * @user_data: The user data returned on SAX callbacks |
---|
5845 | * @chunk: a pointer to an array of chars |
---|
5846 | * @size: number of chars in the array |
---|
5847 | * @filename: an optional file name or URI |
---|
5848 | * @enc: an optional encoding |
---|
5849 | * |
---|
5850 | * Create a parser context for using the DocBook SGML parser in push mode |
---|
5851 | * To allow content encoding detection, @size should be >= 4 |
---|
5852 | * The value of @filename is used for fetching external entities |
---|
5853 | * and error/warning reports. |
---|
5854 | * |
---|
5855 | * Returns the new parser context or NULL |
---|
5856 | */ |
---|
5857 | docbParserCtxtPtr |
---|
5858 | docbCreatePushParserCtxt(docbSAXHandlerPtr sax, void *user_data, |
---|
5859 | const char *chunk, int size, const char *filename, |
---|
5860 | xmlCharEncoding enc) { |
---|
5861 | docbParserCtxtPtr ctxt; |
---|
5862 | docbParserInputPtr inputStream; |
---|
5863 | xmlParserInputBufferPtr buf; |
---|
5864 | |
---|
5865 | buf = xmlAllocParserInputBuffer(enc); |
---|
5866 | if (buf == NULL) return(NULL); |
---|
5867 | |
---|
5868 | ctxt = (docbParserCtxtPtr) xmlMalloc(sizeof(docbParserCtxt)); |
---|
5869 | if (ctxt == NULL) { |
---|
5870 | xmlFree(buf); |
---|
5871 | return(NULL); |
---|
5872 | } |
---|
5873 | memset(ctxt, 0, sizeof(docbParserCtxt)); |
---|
5874 | docbInitParserCtxt(ctxt); |
---|
5875 | if (sax != NULL) { |
---|
5876 | if (ctxt->sax != &docbDefaultSAXHandler) |
---|
5877 | xmlFree(ctxt->sax); |
---|
5878 | ctxt->sax = (docbSAXHandlerPtr) xmlMalloc(sizeof(docbSAXHandler)); |
---|
5879 | if (ctxt->sax == NULL) { |
---|
5880 | xmlFree(buf); |
---|
5881 | xmlFree(ctxt); |
---|
5882 | return(NULL); |
---|
5883 | } |
---|
5884 | memcpy(ctxt->sax, sax, sizeof(docbSAXHandler)); |
---|
5885 | if (user_data != NULL) |
---|
5886 | ctxt->userData = user_data; |
---|
5887 | } |
---|
5888 | if (filename == NULL) { |
---|
5889 | ctxt->directory = NULL; |
---|
5890 | } else { |
---|
5891 | ctxt->directory = xmlParserGetDirectory(filename); |
---|
5892 | } |
---|
5893 | |
---|
5894 | inputStream = docbNewInputStream(ctxt); |
---|
5895 | if (inputStream == NULL) { |
---|
5896 | xmlFreeParserCtxt(ctxt); |
---|
5897 | return(NULL); |
---|
5898 | } |
---|
5899 | |
---|
5900 | if (filename == NULL) |
---|
5901 | inputStream->filename = NULL; |
---|
5902 | else |
---|
5903 | inputStream->filename = xmlMemStrdup(filename); |
---|
5904 | inputStream->buf = buf; |
---|
5905 | inputStream->base = inputStream->buf->buffer->content; |
---|
5906 | inputStream->cur = inputStream->buf->buffer->content; |
---|
5907 | |
---|
5908 | inputPush(ctxt, inputStream); |
---|
5909 | |
---|
5910 | if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && |
---|
5911 | (ctxt->input->buf != NULL)) { |
---|
5912 | xmlParserInputBufferPush(ctxt->input->buf, size, chunk); |
---|
5913 | #ifdef DEBUG_PUSH |
---|
5914 | xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); |
---|
5915 | #endif |
---|
5916 | } |
---|
5917 | |
---|
5918 | return(ctxt); |
---|
5919 | } |
---|
5920 | |
---|
5921 | /** |
---|
5922 | * docbSAXParseDoc : |
---|
5923 | * @cur: a pointer to an array of xmlChar |
---|
5924 | * @encoding: a free form C string describing the SGML document encoding, or NULL |
---|
5925 | * @sax: the SAX handler block |
---|
5926 | * @userData: if using SAX, this pointer will be provided on callbacks. |
---|
5927 | * |
---|
5928 | * parse an SGML in-memory document and build a tree. |
---|
5929 | * It use the given SAX function block to handle the parsing callback. |
---|
5930 | * If sax is NULL, fallback to the default DOM tree building routines. |
---|
5931 | * |
---|
5932 | * Returns the resulting document tree |
---|
5933 | */ |
---|
5934 | |
---|
5935 | docbDocPtr |
---|
5936 | docbSAXParseDoc(xmlChar *cur, const char *encoding, docbSAXHandlerPtr sax, void *userData) { |
---|
5937 | docbDocPtr ret; |
---|
5938 | docbParserCtxtPtr ctxt; |
---|
5939 | |
---|
5940 | if (cur == NULL) return(NULL); |
---|
5941 | |
---|
5942 | |
---|
5943 | ctxt = docbCreateDocParserCtxt(cur, encoding); |
---|
5944 | if (ctxt == NULL) return(NULL); |
---|
5945 | if (sax != NULL) { |
---|
5946 | ctxt->sax = sax; |
---|
5947 | ctxt->userData = userData; |
---|
5948 | } |
---|
5949 | |
---|
5950 | docbParseDocument(ctxt); |
---|
5951 | ret = ctxt->myDoc; |
---|
5952 | if (sax != NULL) { |
---|
5953 | ctxt->sax = NULL; |
---|
5954 | ctxt->userData = NULL; |
---|
5955 | } |
---|
5956 | docbFreeParserCtxt(ctxt); |
---|
5957 | |
---|
5958 | return(ret); |
---|
5959 | } |
---|
5960 | |
---|
5961 | /** |
---|
5962 | * docbParseDoc : |
---|
5963 | * @cur: a pointer to an array of xmlChar |
---|
5964 | * @encoding: a free form C string describing the SGML document encoding, or NULL |
---|
5965 | * |
---|
5966 | * parse an SGML in-memory document and build a tree. |
---|
5967 | * |
---|
5968 | * Returns the resulting document tree |
---|
5969 | */ |
---|
5970 | |
---|
5971 | docbDocPtr |
---|
5972 | docbParseDoc(xmlChar *cur, const char *encoding) { |
---|
5973 | return(docbSAXParseDoc(cur, encoding, NULL, NULL)); |
---|
5974 | } |
---|
5975 | |
---|
5976 | |
---|
5977 | /** |
---|
5978 | * docbCreateFileParserCtxt : |
---|
5979 | * @filename: the filename |
---|
5980 | * @encoding: the SGML document encoding, or NULL |
---|
5981 | * |
---|
5982 | * Create a parser context for a file content. |
---|
5983 | * Automatic support for ZLIB/Compress compressed document is provided |
---|
5984 | * by default if found at compile-time. |
---|
5985 | * |
---|
5986 | * Returns the new parser context or NULL |
---|
5987 | */ |
---|
5988 | docbParserCtxtPtr |
---|
5989 | docbCreateFileParserCtxt(const char *filename, |
---|
5990 | const char *encoding ATTRIBUTE_UNUSED) |
---|
5991 | { |
---|
5992 | docbParserCtxtPtr ctxt; |
---|
5993 | docbParserInputPtr inputStream; |
---|
5994 | xmlParserInputBufferPtr buf; |
---|
5995 | /* sgmlCharEncoding enc; */ |
---|
5996 | |
---|
5997 | buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); |
---|
5998 | if (buf == NULL) return(NULL); |
---|
5999 | |
---|
6000 | ctxt = (docbParserCtxtPtr) xmlMalloc(sizeof(docbParserCtxt)); |
---|
6001 | if (ctxt == NULL) { |
---|
6002 | perror("malloc"); |
---|
6003 | return(NULL); |
---|
6004 | } |
---|
6005 | memset(ctxt, 0, sizeof(docbParserCtxt)); |
---|
6006 | docbInitParserCtxt(ctxt); |
---|
6007 | inputStream = (docbParserInputPtr) xmlMalloc(sizeof(docbParserInput)); |
---|
6008 | if (inputStream == NULL) { |
---|
6009 | perror("malloc"); |
---|
6010 | xmlFree(ctxt); |
---|
6011 | return(NULL); |
---|
6012 | } |
---|
6013 | memset(inputStream, 0, sizeof(docbParserInput)); |
---|
6014 | |
---|
6015 | inputStream->filename = xmlMemStrdup(filename); |
---|
6016 | inputStream->line = 1; |
---|
6017 | inputStream->col = 1; |
---|
6018 | inputStream->buf = buf; |
---|
6019 | inputStream->directory = NULL; |
---|
6020 | |
---|
6021 | inputStream->base = inputStream->buf->buffer->content; |
---|
6022 | inputStream->cur = inputStream->buf->buffer->content; |
---|
6023 | inputStream->free = NULL; |
---|
6024 | |
---|
6025 | inputPush(ctxt, inputStream); |
---|
6026 | return(ctxt); |
---|
6027 | } |
---|
6028 | |
---|
6029 | /** |
---|
6030 | * docbSAXParseFile : |
---|
6031 | * @filename: the filename |
---|
6032 | * @encoding: a free form C string describing the SGML document encoding, or NULL |
---|
6033 | * @sax: the SAX handler block |
---|
6034 | * @userData: if using SAX, this pointer will be provided on callbacks. |
---|
6035 | * |
---|
6036 | * parse an SGML file and build a tree. Automatic support for ZLIB/Compress |
---|
6037 | * compressed document is provided by default if found at compile-time. |
---|
6038 | * It use the given SAX function block to handle the parsing callback. |
---|
6039 | * If sax is NULL, fallback to the default DOM tree building routines. |
---|
6040 | * |
---|
6041 | * Returns the resulting document tree |
---|
6042 | */ |
---|
6043 | |
---|
6044 | docbDocPtr |
---|
6045 | docbSAXParseFile(const char *filename, const char *encoding, docbSAXHandlerPtr sax, |
---|
6046 | void *userData) { |
---|
6047 | docbDocPtr ret; |
---|
6048 | docbParserCtxtPtr ctxt; |
---|
6049 | docbSAXHandlerPtr oldsax = NULL; |
---|
6050 | |
---|
6051 | ctxt = docbCreateFileParserCtxt(filename, encoding); |
---|
6052 | if (ctxt == NULL) return(NULL); |
---|
6053 | if (sax != NULL) { |
---|
6054 | oldsax = ctxt->sax; |
---|
6055 | ctxt->sax = sax; |
---|
6056 | ctxt->userData = userData; |
---|
6057 | } |
---|
6058 | |
---|
6059 | docbParseDocument(ctxt); |
---|
6060 | |
---|
6061 | ret = ctxt->myDoc; |
---|
6062 | if (sax != NULL) { |
---|
6063 | ctxt->sax = oldsax; |
---|
6064 | ctxt->userData = NULL; |
---|
6065 | } |
---|
6066 | docbFreeParserCtxt(ctxt); |
---|
6067 | |
---|
6068 | return(ret); |
---|
6069 | } |
---|
6070 | |
---|
6071 | /** |
---|
6072 | * docbParseFile : |
---|
6073 | * @filename: the filename |
---|
6074 | * @encoding: a free form C string describing document encoding, or NULL |
---|
6075 | * |
---|
6076 | * parse a Docbook SGML file and build a tree. Automatic support for |
---|
6077 | * ZLIB/Compress compressed document is provided by default if found |
---|
6078 | * at compile-time. |
---|
6079 | * |
---|
6080 | * Returns the resulting document tree |
---|
6081 | */ |
---|
6082 | |
---|
6083 | docbDocPtr |
---|
6084 | docbParseFile(const char *filename, const char *encoding) { |
---|
6085 | return(docbSAXParseFile(filename, encoding, NULL, NULL)); |
---|
6086 | } |
---|
6087 | |
---|
6088 | #endif /* LIBXML_DOCB_ENABLED */ |
---|