1 | /* gmarkup.c - Simple XML-like parser |
---|
2 | * |
---|
3 | * Copyright 2000, 2003 Red Hat, Inc. |
---|
4 | * |
---|
5 | * GLib is free software; you can redistribute it and/or modify it |
---|
6 | * under the terms of the GNU Lesser General Public License as |
---|
7 | * published by the Free Software Foundation; either version 2 of the |
---|
8 | * License, or (at your option) any later version. |
---|
9 | * |
---|
10 | * GLib is distributed in the hope that it will be useful, |
---|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
13 | * Lesser General Public License for more details. |
---|
14 | * |
---|
15 | * You should have received a copy of the GNU Lesser General Public |
---|
16 | * License along with GLib; see the file COPYING.LIB. If not, |
---|
17 | * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
---|
18 | * Boston, MA 02111-1307, USA. |
---|
19 | */ |
---|
20 | |
---|
21 | #include "config.h" |
---|
22 | |
---|
23 | #include <stdarg.h> |
---|
24 | #include <string.h> |
---|
25 | #include <stdio.h> |
---|
26 | #include <stdlib.h> |
---|
27 | #include <errno.h> |
---|
28 | |
---|
29 | #include "glib.h" |
---|
30 | |
---|
31 | #include "glibintl.h" |
---|
32 | |
---|
33 | GQuark |
---|
34 | g_markup_error_quark (void) |
---|
35 | { |
---|
36 | static GQuark error_quark = 0; |
---|
37 | |
---|
38 | if (error_quark == 0) |
---|
39 | error_quark = g_quark_from_static_string ("g-markup-error-quark"); |
---|
40 | |
---|
41 | return error_quark; |
---|
42 | } |
---|
43 | |
---|
44 | typedef enum |
---|
45 | { |
---|
46 | STATE_START, |
---|
47 | STATE_AFTER_OPEN_ANGLE, |
---|
48 | STATE_AFTER_CLOSE_ANGLE, |
---|
49 | STATE_AFTER_ELISION_SLASH, /* the slash that obviates need for end element */ |
---|
50 | STATE_INSIDE_OPEN_TAG_NAME, |
---|
51 | STATE_INSIDE_ATTRIBUTE_NAME, |
---|
52 | STATE_AFTER_ATTRIBUTE_NAME, |
---|
53 | STATE_BETWEEN_ATTRIBUTES, |
---|
54 | STATE_AFTER_ATTRIBUTE_EQUALS_SIGN, |
---|
55 | STATE_INSIDE_ATTRIBUTE_VALUE_SQ, |
---|
56 | STATE_INSIDE_ATTRIBUTE_VALUE_DQ, |
---|
57 | STATE_INSIDE_TEXT, |
---|
58 | STATE_AFTER_CLOSE_TAG_SLASH, |
---|
59 | STATE_INSIDE_CLOSE_TAG_NAME, |
---|
60 | STATE_AFTER_CLOSE_TAG_NAME, |
---|
61 | STATE_INSIDE_PASSTHROUGH, |
---|
62 | STATE_ERROR |
---|
63 | } GMarkupParseState; |
---|
64 | |
---|
65 | struct _GMarkupParseContext |
---|
66 | { |
---|
67 | const GMarkupParser *parser; |
---|
68 | |
---|
69 | GMarkupParseFlags flags; |
---|
70 | |
---|
71 | gint line_number; |
---|
72 | gint char_number; |
---|
73 | |
---|
74 | gpointer user_data; |
---|
75 | GDestroyNotify dnotify; |
---|
76 | |
---|
77 | /* A piece of character data or an element that |
---|
78 | * hasn't "ended" yet so we haven't yet called |
---|
79 | * the callback for it. |
---|
80 | */ |
---|
81 | GString *partial_chunk; |
---|
82 | |
---|
83 | GMarkupParseState state; |
---|
84 | GSList *tag_stack; |
---|
85 | gchar **attr_names; |
---|
86 | gchar **attr_values; |
---|
87 | gint cur_attr; |
---|
88 | gint alloc_attrs; |
---|
89 | |
---|
90 | const gchar *current_text; |
---|
91 | gssize current_text_len; |
---|
92 | const gchar *current_text_end; |
---|
93 | |
---|
94 | GString *leftover_char_portion; |
---|
95 | |
---|
96 | /* used to save the start of the last interesting thingy */ |
---|
97 | const gchar *start; |
---|
98 | |
---|
99 | const gchar *iter; |
---|
100 | |
---|
101 | guint document_empty : 1; |
---|
102 | guint parsing : 1; |
---|
103 | gint balance; |
---|
104 | }; |
---|
105 | |
---|
106 | /** |
---|
107 | * g_markup_parse_context_new: |
---|
108 | * @parser: a #GMarkupParser |
---|
109 | * @flags: one or more #GMarkupParseFlags |
---|
110 | * @user_data: user data to pass to #GMarkupParser functions |
---|
111 | * @user_data_dnotify: user data destroy notifier called when the parse context is freed |
---|
112 | * |
---|
113 | * Creates a new parse context. A parse context is used to parse |
---|
114 | * marked-up documents. You can feed any number of documents into |
---|
115 | * a context, as long as no errors occur; once an error occurs, |
---|
116 | * the parse context can't continue to parse text (you have to free it |
---|
117 | * and create a new parse context). |
---|
118 | * |
---|
119 | * Return value: a new #GMarkupParseContext |
---|
120 | **/ |
---|
121 | GMarkupParseContext * |
---|
122 | g_markup_parse_context_new (const GMarkupParser *parser, |
---|
123 | GMarkupParseFlags flags, |
---|
124 | gpointer user_data, |
---|
125 | GDestroyNotify user_data_dnotify) |
---|
126 | { |
---|
127 | GMarkupParseContext *context; |
---|
128 | |
---|
129 | g_return_val_if_fail (parser != NULL, NULL); |
---|
130 | |
---|
131 | context = g_new (GMarkupParseContext, 1); |
---|
132 | |
---|
133 | context->parser = parser; |
---|
134 | context->flags = flags; |
---|
135 | context->user_data = user_data; |
---|
136 | context->dnotify = user_data_dnotify; |
---|
137 | |
---|
138 | context->line_number = 1; |
---|
139 | context->char_number = 1; |
---|
140 | |
---|
141 | context->partial_chunk = NULL; |
---|
142 | |
---|
143 | context->state = STATE_START; |
---|
144 | context->tag_stack = NULL; |
---|
145 | context->attr_names = NULL; |
---|
146 | context->attr_values = NULL; |
---|
147 | context->cur_attr = -1; |
---|
148 | context->alloc_attrs = 0; |
---|
149 | |
---|
150 | context->current_text = NULL; |
---|
151 | context->current_text_len = -1; |
---|
152 | context->current_text_end = NULL; |
---|
153 | context->leftover_char_portion = NULL; |
---|
154 | |
---|
155 | context->start = NULL; |
---|
156 | context->iter = NULL; |
---|
157 | |
---|
158 | context->document_empty = TRUE; |
---|
159 | context->parsing = FALSE; |
---|
160 | |
---|
161 | context->balance = 0; |
---|
162 | |
---|
163 | return context; |
---|
164 | } |
---|
165 | |
---|
166 | /** |
---|
167 | * g_markup_parse_context_free: |
---|
168 | * @context: a #GMarkupParseContext |
---|
169 | * |
---|
170 | * Frees a #GMarkupParseContext. Can't be called from inside |
---|
171 | * one of the #GMarkupParser functions. |
---|
172 | * |
---|
173 | **/ |
---|
174 | void |
---|
175 | g_markup_parse_context_free (GMarkupParseContext *context) |
---|
176 | { |
---|
177 | g_return_if_fail (context != NULL); |
---|
178 | g_return_if_fail (!context->parsing); |
---|
179 | |
---|
180 | if (context->dnotify) |
---|
181 | (* context->dnotify) (context->user_data); |
---|
182 | |
---|
183 | g_strfreev (context->attr_names); |
---|
184 | g_strfreev (context->attr_values); |
---|
185 | |
---|
186 | g_slist_foreach (context->tag_stack, (GFunc)g_free, NULL); |
---|
187 | g_slist_free (context->tag_stack); |
---|
188 | |
---|
189 | if (context->partial_chunk) |
---|
190 | g_string_free (context->partial_chunk, TRUE); |
---|
191 | |
---|
192 | if (context->leftover_char_portion) |
---|
193 | g_string_free (context->leftover_char_portion, TRUE); |
---|
194 | |
---|
195 | g_free (context); |
---|
196 | } |
---|
197 | |
---|
198 | static void |
---|
199 | mark_error (GMarkupParseContext *context, |
---|
200 | GError *error) |
---|
201 | { |
---|
202 | context->state = STATE_ERROR; |
---|
203 | |
---|
204 | if (context->parser->error) |
---|
205 | (*context->parser->error) (context, error, context->user_data); |
---|
206 | } |
---|
207 | |
---|
208 | static void |
---|
209 | set_error (GMarkupParseContext *context, |
---|
210 | GError **error, |
---|
211 | GMarkupError code, |
---|
212 | const gchar *format, |
---|
213 | ...) |
---|
214 | { |
---|
215 | GError *tmp_error; |
---|
216 | gchar *s; |
---|
217 | va_list args; |
---|
218 | |
---|
219 | va_start (args, format); |
---|
220 | s = g_strdup_vprintf (format, args); |
---|
221 | va_end (args); |
---|
222 | |
---|
223 | tmp_error = g_error_new (G_MARKUP_ERROR, |
---|
224 | code, |
---|
225 | _("Error on line %d char %d: %s"), |
---|
226 | context->line_number, |
---|
227 | context->char_number, |
---|
228 | s); |
---|
229 | |
---|
230 | g_free (s); |
---|
231 | |
---|
232 | mark_error (context, tmp_error); |
---|
233 | |
---|
234 | g_propagate_error (error, tmp_error); |
---|
235 | } |
---|
236 | |
---|
237 | static gboolean |
---|
238 | is_name_start_char (gunichar c) |
---|
239 | { |
---|
240 | if (g_unichar_isalpha (c) || |
---|
241 | c == '_' || |
---|
242 | c == ':') |
---|
243 | return TRUE; |
---|
244 | else |
---|
245 | return FALSE; |
---|
246 | } |
---|
247 | |
---|
248 | static gboolean |
---|
249 | is_name_char (gunichar c) |
---|
250 | { |
---|
251 | if (g_unichar_isalnum (c) || |
---|
252 | c == '.' || |
---|
253 | c == '-' || |
---|
254 | c == '_' || |
---|
255 | c == ':') |
---|
256 | return TRUE; |
---|
257 | else |
---|
258 | return FALSE; |
---|
259 | } |
---|
260 | |
---|
261 | |
---|
262 | static gchar* |
---|
263 | char_str (gunichar c, |
---|
264 | gchar *buf) |
---|
265 | { |
---|
266 | memset (buf, 0, 7); |
---|
267 | g_unichar_to_utf8 (c, buf); |
---|
268 | return buf; |
---|
269 | } |
---|
270 | |
---|
271 | static gchar* |
---|
272 | utf8_str (const gchar *utf8, |
---|
273 | gchar *buf) |
---|
274 | { |
---|
275 | char_str (g_utf8_get_char (utf8), buf); |
---|
276 | return buf; |
---|
277 | } |
---|
278 | |
---|
279 | static void |
---|
280 | set_unescape_error (GMarkupParseContext *context, |
---|
281 | GError **error, |
---|
282 | const gchar *remaining_text, |
---|
283 | const gchar *remaining_text_end, |
---|
284 | GMarkupError code, |
---|
285 | const gchar *format, |
---|
286 | ...) |
---|
287 | { |
---|
288 | GError *tmp_error; |
---|
289 | gchar *s; |
---|
290 | va_list args; |
---|
291 | gint remaining_newlines; |
---|
292 | const gchar *p; |
---|
293 | |
---|
294 | remaining_newlines = 0; |
---|
295 | p = remaining_text; |
---|
296 | while (p != remaining_text_end) |
---|
297 | { |
---|
298 | if (*p == '\n') |
---|
299 | ++remaining_newlines; |
---|
300 | ++p; |
---|
301 | } |
---|
302 | |
---|
303 | va_start (args, format); |
---|
304 | s = g_strdup_vprintf (format, args); |
---|
305 | va_end (args); |
---|
306 | |
---|
307 | tmp_error = g_error_new (G_MARKUP_ERROR, |
---|
308 | code, |
---|
309 | _("Error on line %d: %s"), |
---|
310 | context->line_number - remaining_newlines, |
---|
311 | s); |
---|
312 | |
---|
313 | g_free (s); |
---|
314 | |
---|
315 | mark_error (context, tmp_error); |
---|
316 | |
---|
317 | g_propagate_error (error, tmp_error); |
---|
318 | } |
---|
319 | |
---|
320 | typedef enum |
---|
321 | { |
---|
322 | USTATE_INSIDE_TEXT, |
---|
323 | USTATE_AFTER_AMPERSAND, |
---|
324 | USTATE_INSIDE_ENTITY_NAME, |
---|
325 | USTATE_AFTER_CHARREF_HASH |
---|
326 | } UnescapeState; |
---|
327 | |
---|
328 | static gboolean |
---|
329 | unescape_text (GMarkupParseContext *context, |
---|
330 | const gchar *text, |
---|
331 | const gchar *text_end, |
---|
332 | gchar **unescaped, |
---|
333 | GError **error) |
---|
334 | { |
---|
335 | #define MAX_ENT_LEN 5 |
---|
336 | GString *str; |
---|
337 | const gchar *p; |
---|
338 | UnescapeState state; |
---|
339 | const gchar *start; |
---|
340 | gboolean normalize_attribute; |
---|
341 | |
---|
342 | str = g_string_new (NULL); |
---|
343 | |
---|
344 | if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ || |
---|
345 | context->state == STATE_INSIDE_ATTRIBUTE_VALUE_DQ) |
---|
346 | normalize_attribute = TRUE; |
---|
347 | else |
---|
348 | normalize_attribute = FALSE; |
---|
349 | |
---|
350 | state = USTATE_INSIDE_TEXT; |
---|
351 | p = text; |
---|
352 | start = p; |
---|
353 | while (p != text_end && context->state != STATE_ERROR) |
---|
354 | { |
---|
355 | g_assert (p < text_end); |
---|
356 | |
---|
357 | switch (state) |
---|
358 | { |
---|
359 | case USTATE_INSIDE_TEXT: |
---|
360 | { |
---|
361 | while (p != text_end && *p != '&') |
---|
362 | { |
---|
363 | if ((*p == '\t' || *p == '\n') && normalize_attribute) |
---|
364 | { |
---|
365 | g_string_append_len (str, start, p - start); |
---|
366 | g_string_append_c (str, ' '); |
---|
367 | p = g_utf8_next_char (p); |
---|
368 | start = p; |
---|
369 | } |
---|
370 | else if (*p == '\r') |
---|
371 | { |
---|
372 | g_string_append_len (str, start, p - start); |
---|
373 | g_string_append_c (str, normalize_attribute ? ' ' : '\n'); |
---|
374 | p = g_utf8_next_char (p); |
---|
375 | if (*p == '\n') |
---|
376 | p = g_utf8_next_char (p); |
---|
377 | start = p; |
---|
378 | } |
---|
379 | else |
---|
380 | p = g_utf8_next_char (p); |
---|
381 | } |
---|
382 | |
---|
383 | if (p != start) |
---|
384 | { |
---|
385 | g_string_append_len (str, start, p - start); |
---|
386 | |
---|
387 | start = NULL; |
---|
388 | } |
---|
389 | |
---|
390 | if (p != text_end && *p == '&') |
---|
391 | { |
---|
392 | p = g_utf8_next_char (p); |
---|
393 | state = USTATE_AFTER_AMPERSAND; |
---|
394 | } |
---|
395 | } |
---|
396 | break; |
---|
397 | |
---|
398 | case USTATE_AFTER_AMPERSAND: |
---|
399 | { |
---|
400 | if (*p == '#') |
---|
401 | { |
---|
402 | p = g_utf8_next_char (p); |
---|
403 | |
---|
404 | start = p; |
---|
405 | state = USTATE_AFTER_CHARREF_HASH; |
---|
406 | } |
---|
407 | else if (!is_name_start_char (g_utf8_get_char (p))) |
---|
408 | { |
---|
409 | if (*p == ';') |
---|
410 | { |
---|
411 | set_unescape_error (context, error, |
---|
412 | p, text_end, |
---|
413 | G_MARKUP_ERROR_PARSE, |
---|
414 | _("Empty entity '&;' seen; valid " |
---|
415 | "entities are: & " < > '")); |
---|
416 | } |
---|
417 | else |
---|
418 | { |
---|
419 | gchar buf[7]; |
---|
420 | |
---|
421 | set_unescape_error (context, error, |
---|
422 | p, text_end, |
---|
423 | G_MARKUP_ERROR_PARSE, |
---|
424 | _("Character '%s' is not valid at " |
---|
425 | "the start of an entity name; " |
---|
426 | "the & character begins an entity; " |
---|
427 | "if this ampersand isn't supposed " |
---|
428 | "to be an entity, escape it as " |
---|
429 | "&"), |
---|
430 | utf8_str (p, buf)); |
---|
431 | } |
---|
432 | } |
---|
433 | else |
---|
434 | { |
---|
435 | start = p; |
---|
436 | state = USTATE_INSIDE_ENTITY_NAME; |
---|
437 | } |
---|
438 | } |
---|
439 | break; |
---|
440 | |
---|
441 | |
---|
442 | case USTATE_INSIDE_ENTITY_NAME: |
---|
443 | { |
---|
444 | gchar buf[MAX_ENT_LEN+1] = { |
---|
445 | '\0', '\0', '\0', '\0', '\0', '\0' |
---|
446 | }; |
---|
447 | gchar *dest; |
---|
448 | |
---|
449 | while (p != text_end) |
---|
450 | { |
---|
451 | if (*p == ';') |
---|
452 | break; |
---|
453 | else if (!is_name_char (*p)) |
---|
454 | { |
---|
455 | gchar ubuf[7]; |
---|
456 | |
---|
457 | set_unescape_error (context, error, |
---|
458 | p, text_end, |
---|
459 | G_MARKUP_ERROR_PARSE, |
---|
460 | _("Character '%s' is not valid " |
---|
461 | "inside an entity name"), |
---|
462 | utf8_str (p, ubuf)); |
---|
463 | break; |
---|
464 | } |
---|
465 | |
---|
466 | p = g_utf8_next_char (p); |
---|
467 | } |
---|
468 | |
---|
469 | if (context->state != STATE_ERROR) |
---|
470 | { |
---|
471 | if (p != text_end) |
---|
472 | { |
---|
473 | const gchar *src; |
---|
474 | |
---|
475 | src = start; |
---|
476 | dest = buf; |
---|
477 | while (src != p) |
---|
478 | { |
---|
479 | *dest = *src; |
---|
480 | ++dest; |
---|
481 | ++src; |
---|
482 | } |
---|
483 | |
---|
484 | /* move to after semicolon */ |
---|
485 | p = g_utf8_next_char (p); |
---|
486 | start = p; |
---|
487 | state = USTATE_INSIDE_TEXT; |
---|
488 | |
---|
489 | if (strcmp (buf, "lt") == 0) |
---|
490 | g_string_append_c (str, '<'); |
---|
491 | else if (strcmp (buf, "gt") == 0) |
---|
492 | g_string_append_c (str, '>'); |
---|
493 | else if (strcmp (buf, "amp") == 0) |
---|
494 | g_string_append_c (str, '&'); |
---|
495 | else if (strcmp (buf, "quot") == 0) |
---|
496 | g_string_append_c (str, '"'); |
---|
497 | else if (strcmp (buf, "apos") == 0) |
---|
498 | g_string_append_c (str, '\''); |
---|
499 | else |
---|
500 | { |
---|
501 | set_unescape_error (context, error, |
---|
502 | p, text_end, |
---|
503 | G_MARKUP_ERROR_PARSE, |
---|
504 | _("Entity name '%s' is not known"), |
---|
505 | buf); |
---|
506 | } |
---|
507 | } |
---|
508 | else |
---|
509 | { |
---|
510 | set_unescape_error (context, error, |
---|
511 | /* give line number of the & */ |
---|
512 | start, text_end, |
---|
513 | G_MARKUP_ERROR_PARSE, |
---|
514 | _("Entity did not end with a semicolon; " |
---|
515 | "most likely you used an ampersand " |
---|
516 | "character without intending to start " |
---|
517 | "an entity - escape ampersand as &")); |
---|
518 | } |
---|
519 | } |
---|
520 | } |
---|
521 | break; |
---|
522 | |
---|
523 | case USTATE_AFTER_CHARREF_HASH: |
---|
524 | { |
---|
525 | gboolean is_hex = FALSE; |
---|
526 | if (*p == 'x') |
---|
527 | { |
---|
528 | is_hex = TRUE; |
---|
529 | p = g_utf8_next_char (p); |
---|
530 | start = p; |
---|
531 | } |
---|
532 | |
---|
533 | while (p != text_end && *p != ';') |
---|
534 | p = g_utf8_next_char (p); |
---|
535 | |
---|
536 | if (p != text_end) |
---|
537 | { |
---|
538 | g_assert (*p == ';'); |
---|
539 | |
---|
540 | /* digit is between start and p */ |
---|
541 | |
---|
542 | if (start != p) |
---|
543 | { |
---|
544 | gchar *digit = g_strndup (start, p - start); |
---|
545 | gulong l; |
---|
546 | gchar *end = NULL; |
---|
547 | gchar *digit_end = digit + (p - start); |
---|
548 | |
---|
549 | errno = 0; |
---|
550 | if (is_hex) |
---|
551 | l = strtoul (digit, &end, 16); |
---|
552 | else |
---|
553 | l = strtoul (digit, &end, 10); |
---|
554 | |
---|
555 | if (end != digit_end || errno != 0) |
---|
556 | { |
---|
557 | set_unescape_error (context, error, |
---|
558 | start, text_end, |
---|
559 | G_MARKUP_ERROR_PARSE, |
---|
560 | _("Failed to parse '%s', which " |
---|
561 | "should have been a digit " |
---|
562 | "inside a character reference " |
---|
563 | "(ê for example) - perhaps " |
---|
564 | "the digit is too large"), |
---|
565 | digit); |
---|
566 | } |
---|
567 | else |
---|
568 | { |
---|
569 | /* characters XML permits */ |
---|
570 | if (l == 0x9 || |
---|
571 | l == 0xA || |
---|
572 | l == 0xD || |
---|
573 | (l >= 0x20 && l <= 0xD7FF) || |
---|
574 | (l >= 0xE000 && l <= 0xFFFD) || |
---|
575 | (l >= 0x10000 && l <= 0x10FFFF)) |
---|
576 | { |
---|
577 | gchar buf[7]; |
---|
578 | g_string_append (str, char_str (l, buf)); |
---|
579 | } |
---|
580 | else |
---|
581 | { |
---|
582 | set_unescape_error (context, error, |
---|
583 | start, text_end, |
---|
584 | G_MARKUP_ERROR_PARSE, |
---|
585 | _("Character reference '%s' does not encode a permitted character"), |
---|
586 | digit); |
---|
587 | } |
---|
588 | } |
---|
589 | |
---|
590 | g_free (digit); |
---|
591 | |
---|
592 | /* Move to next state */ |
---|
593 | p = g_utf8_next_char (p); /* past semicolon */ |
---|
594 | start = p; |
---|
595 | state = USTATE_INSIDE_TEXT; |
---|
596 | } |
---|
597 | else |
---|
598 | { |
---|
599 | set_unescape_error (context, error, |
---|
600 | start, text_end, |
---|
601 | G_MARKUP_ERROR_PARSE, |
---|
602 | _("Empty character reference; " |
---|
603 | "should include a digit such as " |
---|
604 | "dž")); |
---|
605 | } |
---|
606 | } |
---|
607 | else |
---|
608 | { |
---|
609 | set_unescape_error (context, error, |
---|
610 | start, text_end, |
---|
611 | G_MARKUP_ERROR_PARSE, |
---|
612 | _("Character reference did not end with a " |
---|
613 | "semicolon; " |
---|
614 | "most likely you used an ampersand " |
---|
615 | "character without intending to start " |
---|
616 | "an entity - escape ampersand as &")); |
---|
617 | } |
---|
618 | } |
---|
619 | break; |
---|
620 | |
---|
621 | default: |
---|
622 | g_assert_not_reached (); |
---|
623 | break; |
---|
624 | } |
---|
625 | } |
---|
626 | |
---|
627 | if (context->state != STATE_ERROR) |
---|
628 | { |
---|
629 | switch (state) |
---|
630 | { |
---|
631 | case USTATE_INSIDE_TEXT: |
---|
632 | break; |
---|
633 | case USTATE_AFTER_AMPERSAND: |
---|
634 | case USTATE_INSIDE_ENTITY_NAME: |
---|
635 | set_unescape_error (context, error, |
---|
636 | NULL, NULL, |
---|
637 | G_MARKUP_ERROR_PARSE, |
---|
638 | _("Unfinished entity reference")); |
---|
639 | break; |
---|
640 | case USTATE_AFTER_CHARREF_HASH: |
---|
641 | set_unescape_error (context, error, |
---|
642 | NULL, NULL, |
---|
643 | G_MARKUP_ERROR_PARSE, |
---|
644 | _("Unfinished character reference")); |
---|
645 | break; |
---|
646 | } |
---|
647 | } |
---|
648 | |
---|
649 | if (context->state == STATE_ERROR) |
---|
650 | { |
---|
651 | g_string_free (str, TRUE); |
---|
652 | *unescaped = NULL; |
---|
653 | return FALSE; |
---|
654 | } |
---|
655 | else |
---|
656 | { |
---|
657 | *unescaped = g_string_free (str, FALSE); |
---|
658 | return TRUE; |
---|
659 | } |
---|
660 | |
---|
661 | #undef MAX_ENT_LEN |
---|
662 | } |
---|
663 | |
---|
664 | static gboolean |
---|
665 | advance_char (GMarkupParseContext *context) |
---|
666 | { |
---|
667 | g_return_val_if_fail (context->iter != context->current_text_end, FALSE); |
---|
668 | |
---|
669 | context->iter = g_utf8_next_char (context->iter); |
---|
670 | context->char_number += 1; |
---|
671 | |
---|
672 | if (context->iter == context->current_text_end) |
---|
673 | return FALSE; |
---|
674 | |
---|
675 | if (*context->iter == '\n') |
---|
676 | { |
---|
677 | context->line_number += 1; |
---|
678 | context->char_number = 1; |
---|
679 | } |
---|
680 | |
---|
681 | return TRUE; |
---|
682 | } |
---|
683 | |
---|
684 | static gboolean |
---|
685 | xml_isspace (char c) |
---|
686 | { |
---|
687 | return c == ' ' || c == '\t' || c == '\n' || c == '\r'; |
---|
688 | } |
---|
689 | |
---|
690 | static void |
---|
691 | skip_spaces (GMarkupParseContext *context) |
---|
692 | { |
---|
693 | do |
---|
694 | { |
---|
695 | if (!xml_isspace (*context->iter)) |
---|
696 | return; |
---|
697 | } |
---|
698 | while (advance_char (context)); |
---|
699 | } |
---|
700 | |
---|
701 | static void |
---|
702 | advance_to_name_end (GMarkupParseContext *context) |
---|
703 | { |
---|
704 | do |
---|
705 | { |
---|
706 | if (!is_name_char (g_utf8_get_char (context->iter))) |
---|
707 | return; |
---|
708 | } |
---|
709 | while (advance_char (context)); |
---|
710 | } |
---|
711 | |
---|
712 | static void |
---|
713 | add_to_partial (GMarkupParseContext *context, |
---|
714 | const gchar *text_start, |
---|
715 | const gchar *text_end) |
---|
716 | { |
---|
717 | if (context->partial_chunk == NULL) |
---|
718 | context->partial_chunk = g_string_new (NULL); |
---|
719 | |
---|
720 | if (text_start != text_end) |
---|
721 | g_string_append_len (context->partial_chunk, text_start, |
---|
722 | text_end - text_start); |
---|
723 | |
---|
724 | /* Invariant here that partial_chunk exists */ |
---|
725 | } |
---|
726 | |
---|
727 | static void |
---|
728 | truncate_partial (GMarkupParseContext *context) |
---|
729 | { |
---|
730 | if (context->partial_chunk != NULL) |
---|
731 | { |
---|
732 | context->partial_chunk = g_string_truncate (context->partial_chunk, 0); |
---|
733 | } |
---|
734 | } |
---|
735 | |
---|
736 | static const gchar* |
---|
737 | current_element (GMarkupParseContext *context) |
---|
738 | { |
---|
739 | return context->tag_stack->data; |
---|
740 | } |
---|
741 | |
---|
742 | static const gchar* |
---|
743 | current_attribute (GMarkupParseContext *context) |
---|
744 | { |
---|
745 | g_assert (context->cur_attr >= 0); |
---|
746 | return context->attr_names[context->cur_attr]; |
---|
747 | } |
---|
748 | |
---|
749 | static void |
---|
750 | find_current_text_end (GMarkupParseContext *context) |
---|
751 | { |
---|
752 | /* This function must be safe (non-segfaulting) on invalid UTF8 */ |
---|
753 | const gchar *end = context->current_text + context->current_text_len; |
---|
754 | const gchar *p; |
---|
755 | const gchar *next; |
---|
756 | |
---|
757 | g_assert (context->current_text_len > 0); |
---|
758 | |
---|
759 | p = context->current_text; |
---|
760 | next = g_utf8_find_next_char (p, end); |
---|
761 | |
---|
762 | while (next && *next) |
---|
763 | { |
---|
764 | if (p == next) |
---|
765 | next++; |
---|
766 | p = next; |
---|
767 | next = g_utf8_find_next_char (p, end); |
---|
768 | } |
---|
769 | |
---|
770 | /* p is now the start of the last character or character portion. */ |
---|
771 | g_assert (p != end); |
---|
772 | next = g_utf8_next_char (p); /* this only touches *p, nothing beyond */ |
---|
773 | |
---|
774 | if (next == end) |
---|
775 | { |
---|
776 | /* whole character */ |
---|
777 | context->current_text_end = end; |
---|
778 | } |
---|
779 | else |
---|
780 | { |
---|
781 | /* portion */ |
---|
782 | context->leftover_char_portion = g_string_new_len (p, end - p); |
---|
783 | context->current_text_len -= (end - p); |
---|
784 | context->current_text_end = p; |
---|
785 | } |
---|
786 | } |
---|
787 | |
---|
788 | |
---|
789 | static void |
---|
790 | add_attribute (GMarkupParseContext *context, char *name) |
---|
791 | { |
---|
792 | if (context->cur_attr + 2 >= context->alloc_attrs) |
---|
793 | { |
---|
794 | context->alloc_attrs += 5; /* silly magic number */ |
---|
795 | context->attr_names = g_realloc (context->attr_names, sizeof(char*)*context->alloc_attrs); |
---|
796 | context->attr_values = g_realloc (context->attr_values, sizeof(char*)*context->alloc_attrs); |
---|
797 | } |
---|
798 | context->cur_attr++; |
---|
799 | context->attr_names[context->cur_attr] = name; |
---|
800 | context->attr_values[context->cur_attr] = NULL; |
---|
801 | context->attr_names[context->cur_attr+1] = NULL; |
---|
802 | context->attr_values[context->cur_attr+1] = NULL; |
---|
803 | } |
---|
804 | |
---|
805 | /** |
---|
806 | * g_markup_parse_context_parse: |
---|
807 | * @context: a #GMarkupParseContext |
---|
808 | * @text: chunk of text to parse |
---|
809 | * @text_len: length of @text in bytes |
---|
810 | * @error: return location for a #GError |
---|
811 | * |
---|
812 | * Feed some data to the #GMarkupParseContext. The data need not |
---|
813 | * be valid UTF-8; an error will be signaled if it's invalid. |
---|
814 | * The data need not be an entire document; you can feed a document |
---|
815 | * into the parser incrementally, via multiple calls to this function. |
---|
816 | * Typically, as you receive data from a network connection or file, |
---|
817 | * you feed each received chunk of data into this function, aborting |
---|
818 | * the process if an error occurs. Once an error is reported, no further |
---|
819 | * data may be fed to the #GMarkupParseContext; all errors are fatal. |
---|
820 | * |
---|
821 | * Return value: %FALSE if an error occurred, %TRUE on success |
---|
822 | **/ |
---|
823 | gboolean |
---|
824 | g_markup_parse_context_parse (GMarkupParseContext *context, |
---|
825 | const gchar *text, |
---|
826 | gssize text_len, |
---|
827 | GError **error) |
---|
828 | { |
---|
829 | const gchar *first_invalid; |
---|
830 | |
---|
831 | g_return_val_if_fail (context != NULL, FALSE); |
---|
832 | g_return_val_if_fail (text != NULL, FALSE); |
---|
833 | g_return_val_if_fail (context->state != STATE_ERROR, FALSE); |
---|
834 | g_return_val_if_fail (!context->parsing, FALSE); |
---|
835 | |
---|
836 | if (text_len < 0) |
---|
837 | text_len = strlen (text); |
---|
838 | |
---|
839 | if (text_len == 0) |
---|
840 | return TRUE; |
---|
841 | |
---|
842 | context->parsing = TRUE; |
---|
843 | |
---|
844 | if (context->leftover_char_portion) |
---|
845 | { |
---|
846 | const gchar *first_char; |
---|
847 | |
---|
848 | if ((*text & 0xc0) != 0x80) |
---|
849 | first_char = text; |
---|
850 | else |
---|
851 | first_char = g_utf8_find_next_char (text, text + text_len); |
---|
852 | |
---|
853 | if (first_char) |
---|
854 | { |
---|
855 | /* leftover_char_portion was completed. Parse it. */ |
---|
856 | GString *portion = context->leftover_char_portion; |
---|
857 | |
---|
858 | g_string_append_len (context->leftover_char_portion, |
---|
859 | text, first_char - text); |
---|
860 | |
---|
861 | /* hacks to allow recursion */ |
---|
862 | context->parsing = FALSE; |
---|
863 | context->leftover_char_portion = NULL; |
---|
864 | |
---|
865 | if (!g_markup_parse_context_parse (context, |
---|
866 | portion->str, portion->len, |
---|
867 | error)) |
---|
868 | { |
---|
869 | g_assert (context->state == STATE_ERROR); |
---|
870 | } |
---|
871 | |
---|
872 | g_string_free (portion, TRUE); |
---|
873 | context->parsing = TRUE; |
---|
874 | |
---|
875 | /* Skip the fraction of char that was in this text */ |
---|
876 | text_len -= (first_char - text); |
---|
877 | text = first_char; |
---|
878 | } |
---|
879 | else |
---|
880 | { |
---|
881 | /* another little chunk of the leftover char; geez |
---|
882 | * someone is inefficient. |
---|
883 | */ |
---|
884 | g_string_append_len (context->leftover_char_portion, |
---|
885 | text, text_len); |
---|
886 | |
---|
887 | if (context->leftover_char_portion->len > 7) |
---|
888 | { |
---|
889 | /* The leftover char portion is too big to be |
---|
890 | * a UTF-8 character |
---|
891 | */ |
---|
892 | set_error (context, |
---|
893 | error, |
---|
894 | G_MARKUP_ERROR_BAD_UTF8, |
---|
895 | _("Invalid UTF-8 encoded text")); |
---|
896 | } |
---|
897 | |
---|
898 | goto finished; |
---|
899 | } |
---|
900 | } |
---|
901 | |
---|
902 | context->current_text = text; |
---|
903 | context->current_text_len = text_len; |
---|
904 | context->iter = context->current_text; |
---|
905 | context->start = context->iter; |
---|
906 | |
---|
907 | /* Nothing left after finishing the leftover char, or nothing |
---|
908 | * passed in to begin with. |
---|
909 | */ |
---|
910 | if (context->current_text_len == 0) |
---|
911 | goto finished; |
---|
912 | |
---|
913 | /* find_current_text_end () assumes the string starts at |
---|
914 | * a character start, so we need to validate at least |
---|
915 | * that much. It doesn't assume any following bytes |
---|
916 | * are valid. |
---|
917 | */ |
---|
918 | if ((*context->current_text & 0xc0) == 0x80) /* not a char start */ |
---|
919 | { |
---|
920 | set_error (context, |
---|
921 | error, |
---|
922 | G_MARKUP_ERROR_BAD_UTF8, |
---|
923 | _("Invalid UTF-8 encoded text")); |
---|
924 | goto finished; |
---|
925 | } |
---|
926 | |
---|
927 | /* Initialize context->current_text_end, possibly adjusting |
---|
928 | * current_text_len, and add any leftover char portion |
---|
929 | */ |
---|
930 | find_current_text_end (context); |
---|
931 | |
---|
932 | /* Validate UTF8 (must be done after we find the end, since |
---|
933 | * we could have a trailing incomplete char) |
---|
934 | */ |
---|
935 | if (!g_utf8_validate (context->current_text, |
---|
936 | context->current_text_len, |
---|
937 | &first_invalid)) |
---|
938 | { |
---|
939 | gint newlines = 0; |
---|
940 | const gchar *p; |
---|
941 | p = context->current_text; |
---|
942 | while (p != context->current_text_end) |
---|
943 | { |
---|
944 | if (*p == '\n') |
---|
945 | ++newlines; |
---|
946 | ++p; |
---|
947 | } |
---|
948 | |
---|
949 | context->line_number += newlines; |
---|
950 | |
---|
951 | set_error (context, |
---|
952 | error, |
---|
953 | G_MARKUP_ERROR_BAD_UTF8, |
---|
954 | _("Invalid UTF-8 encoded text")); |
---|
955 | goto finished; |
---|
956 | } |
---|
957 | |
---|
958 | while (context->iter != context->current_text_end) |
---|
959 | { |
---|
960 | switch (context->state) |
---|
961 | { |
---|
962 | case STATE_START: |
---|
963 | /* Possible next state: AFTER_OPEN_ANGLE */ |
---|
964 | |
---|
965 | g_assert (context->tag_stack == NULL); |
---|
966 | |
---|
967 | /* whitespace is ignored outside of any elements */ |
---|
968 | skip_spaces (context); |
---|
969 | |
---|
970 | if (context->iter != context->current_text_end) |
---|
971 | { |
---|
972 | if (*context->iter == '<') |
---|
973 | { |
---|
974 | /* Move after the open angle */ |
---|
975 | advance_char (context); |
---|
976 | |
---|
977 | context->state = STATE_AFTER_OPEN_ANGLE; |
---|
978 | |
---|
979 | /* this could start a passthrough */ |
---|
980 | context->start = context->iter; |
---|
981 | |
---|
982 | /* document is now non-empty */ |
---|
983 | context->document_empty = FALSE; |
---|
984 | } |
---|
985 | else |
---|
986 | { |
---|
987 | set_error (context, |
---|
988 | error, |
---|
989 | G_MARKUP_ERROR_PARSE, |
---|
990 | _("Document must begin with an element (e.g. <book>)")); |
---|
991 | } |
---|
992 | } |
---|
993 | break; |
---|
994 | |
---|
995 | case STATE_AFTER_OPEN_ANGLE: |
---|
996 | /* Possible next states: INSIDE_OPEN_TAG_NAME, |
---|
997 | * AFTER_CLOSE_TAG_SLASH, INSIDE_PASSTHROUGH |
---|
998 | */ |
---|
999 | if (*context->iter == '?' || |
---|
1000 | *context->iter == '!') |
---|
1001 | { |
---|
1002 | /* include < in the passthrough */ |
---|
1003 | const gchar *openangle = "<"; |
---|
1004 | add_to_partial (context, openangle, openangle + 1); |
---|
1005 | context->start = context->iter; |
---|
1006 | context->balance = 1; |
---|
1007 | context->state = STATE_INSIDE_PASSTHROUGH; |
---|
1008 | } |
---|
1009 | else if (*context->iter == '/') |
---|
1010 | { |
---|
1011 | /* move after it */ |
---|
1012 | advance_char (context); |
---|
1013 | |
---|
1014 | context->state = STATE_AFTER_CLOSE_TAG_SLASH; |
---|
1015 | } |
---|
1016 | else if (is_name_start_char (g_utf8_get_char (context->iter))) |
---|
1017 | { |
---|
1018 | context->state = STATE_INSIDE_OPEN_TAG_NAME; |
---|
1019 | |
---|
1020 | /* start of tag name */ |
---|
1021 | context->start = context->iter; |
---|
1022 | } |
---|
1023 | else |
---|
1024 | { |
---|
1025 | gchar buf[7]; |
---|
1026 | set_error (context, |
---|
1027 | error, |
---|
1028 | G_MARKUP_ERROR_PARSE, |
---|
1029 | _("'%s' is not a valid character following " |
---|
1030 | "a '<' character; it may not begin an " |
---|
1031 | "element name"), |
---|
1032 | utf8_str (context->iter, buf)); |
---|
1033 | } |
---|
1034 | break; |
---|
1035 | |
---|
1036 | /* The AFTER_CLOSE_ANGLE state is actually sort of |
---|
1037 | * broken, because it doesn't correspond to a range |
---|
1038 | * of characters in the input stream as the others do, |
---|
1039 | * and thus makes things harder to conceptualize |
---|
1040 | */ |
---|
1041 | case STATE_AFTER_CLOSE_ANGLE: |
---|
1042 | /* Possible next states: INSIDE_TEXT, STATE_START */ |
---|
1043 | if (context->tag_stack == NULL) |
---|
1044 | { |
---|
1045 | context->start = NULL; |
---|
1046 | context->state = STATE_START; |
---|
1047 | } |
---|
1048 | else |
---|
1049 | { |
---|
1050 | context->start = context->iter; |
---|
1051 | context->state = STATE_INSIDE_TEXT; |
---|
1052 | } |
---|
1053 | break; |
---|
1054 | |
---|
1055 | case STATE_AFTER_ELISION_SLASH: |
---|
1056 | /* Possible next state: AFTER_CLOSE_ANGLE */ |
---|
1057 | |
---|
1058 | { |
---|
1059 | /* We need to pop the tag stack and call the end_element |
---|
1060 | * function, since this is the close tag |
---|
1061 | */ |
---|
1062 | GError *tmp_error = NULL; |
---|
1063 | |
---|
1064 | g_assert (context->tag_stack != NULL); |
---|
1065 | |
---|
1066 | tmp_error = NULL; |
---|
1067 | if (context->parser->end_element) |
---|
1068 | (* context->parser->end_element) (context, |
---|
1069 | context->tag_stack->data, |
---|
1070 | context->user_data, |
---|
1071 | &tmp_error); |
---|
1072 | |
---|
1073 | if (tmp_error) |
---|
1074 | { |
---|
1075 | mark_error (context, tmp_error); |
---|
1076 | g_propagate_error (error, tmp_error); |
---|
1077 | } |
---|
1078 | else |
---|
1079 | { |
---|
1080 | if (*context->iter == '>') |
---|
1081 | { |
---|
1082 | /* move after the close angle */ |
---|
1083 | advance_char (context); |
---|
1084 | context->state = STATE_AFTER_CLOSE_ANGLE; |
---|
1085 | } |
---|
1086 | else |
---|
1087 | { |
---|
1088 | gchar buf[7]; |
---|
1089 | set_error (context, |
---|
1090 | error, |
---|
1091 | G_MARKUP_ERROR_PARSE, |
---|
1092 | _("Odd character '%s', expected a '>' character " |
---|
1093 | "to end the start tag of element '%s'"), |
---|
1094 | utf8_str (context->iter, buf), |
---|
1095 | current_element (context)); |
---|
1096 | } |
---|
1097 | } |
---|
1098 | |
---|
1099 | g_free (context->tag_stack->data); |
---|
1100 | context->tag_stack = g_slist_delete_link (context->tag_stack, |
---|
1101 | context->tag_stack); |
---|
1102 | } |
---|
1103 | break; |
---|
1104 | |
---|
1105 | case STATE_INSIDE_OPEN_TAG_NAME: |
---|
1106 | /* Possible next states: BETWEEN_ATTRIBUTES */ |
---|
1107 | |
---|
1108 | /* if there's a partial chunk then it's the first part of the |
---|
1109 | * tag name. If there's a context->start then it's the start |
---|
1110 | * of the tag name in current_text, the partial chunk goes |
---|
1111 | * before that start though. |
---|
1112 | */ |
---|
1113 | advance_to_name_end (context); |
---|
1114 | |
---|
1115 | if (context->iter == context->current_text_end) |
---|
1116 | { |
---|
1117 | /* The name hasn't necessarily ended. Merge with |
---|
1118 | * partial chunk, leave state unchanged. |
---|
1119 | */ |
---|
1120 | add_to_partial (context, context->start, context->iter); |
---|
1121 | } |
---|
1122 | else |
---|
1123 | { |
---|
1124 | /* The name has ended. Combine it with the partial chunk |
---|
1125 | * if any; push it on the stack; enter next state. |
---|
1126 | */ |
---|
1127 | add_to_partial (context, context->start, context->iter); |
---|
1128 | context->tag_stack = |
---|
1129 | g_slist_prepend (context->tag_stack, |
---|
1130 | g_string_free (context->partial_chunk, |
---|
1131 | FALSE)); |
---|
1132 | |
---|
1133 | context->partial_chunk = NULL; |
---|
1134 | |
---|
1135 | context->state = STATE_BETWEEN_ATTRIBUTES; |
---|
1136 | context->start = NULL; |
---|
1137 | } |
---|
1138 | break; |
---|
1139 | |
---|
1140 | case STATE_INSIDE_ATTRIBUTE_NAME: |
---|
1141 | /* Possible next states: AFTER_ATTRIBUTE_NAME */ |
---|
1142 | |
---|
1143 | advance_to_name_end (context); |
---|
1144 | add_to_partial (context, context->start, context->iter); |
---|
1145 | |
---|
1146 | /* read the full name, if we enter the equals sign state |
---|
1147 | * then add the attribute to the list (without the value), |
---|
1148 | * otherwise store a partial chunk to be prepended later. |
---|
1149 | */ |
---|
1150 | if (context->iter != context->current_text_end) |
---|
1151 | context->state = STATE_AFTER_ATTRIBUTE_NAME; |
---|
1152 | break; |
---|
1153 | |
---|
1154 | case STATE_AFTER_ATTRIBUTE_NAME: |
---|
1155 | /* Possible next states: AFTER_ATTRIBUTE_EQUALS_SIGN */ |
---|
1156 | |
---|
1157 | skip_spaces (context); |
---|
1158 | |
---|
1159 | if (context->iter != context->current_text_end) |
---|
1160 | { |
---|
1161 | /* The name has ended. Combine it with the partial chunk |
---|
1162 | * if any; push it on the stack; enter next state. |
---|
1163 | */ |
---|
1164 | add_attribute (context, g_string_free (context->partial_chunk, FALSE)); |
---|
1165 | |
---|
1166 | context->partial_chunk = NULL; |
---|
1167 | context->start = NULL; |
---|
1168 | |
---|
1169 | if (*context->iter == '=') |
---|
1170 | { |
---|
1171 | advance_char (context); |
---|
1172 | context->state = STATE_AFTER_ATTRIBUTE_EQUALS_SIGN; |
---|
1173 | } |
---|
1174 | else |
---|
1175 | { |
---|
1176 | gchar buf[7]; |
---|
1177 | set_error (context, |
---|
1178 | error, |
---|
1179 | G_MARKUP_ERROR_PARSE, |
---|
1180 | _("Odd character '%s', expected a '=' after " |
---|
1181 | "attribute name '%s' of element '%s'"), |
---|
1182 | utf8_str (context->iter, buf), |
---|
1183 | current_attribute (context), |
---|
1184 | current_element (context)); |
---|
1185 | |
---|
1186 | } |
---|
1187 | } |
---|
1188 | break; |
---|
1189 | |
---|
1190 | case STATE_BETWEEN_ATTRIBUTES: |
---|
1191 | /* Possible next states: AFTER_CLOSE_ANGLE, |
---|
1192 | * AFTER_ELISION_SLASH, INSIDE_ATTRIBUTE_NAME |
---|
1193 | */ |
---|
1194 | skip_spaces (context); |
---|
1195 | |
---|
1196 | if (context->iter != context->current_text_end) |
---|
1197 | { |
---|
1198 | if (*context->iter == '/') |
---|
1199 | { |
---|
1200 | advance_char (context); |
---|
1201 | context->state = STATE_AFTER_ELISION_SLASH; |
---|
1202 | } |
---|
1203 | else if (*context->iter == '>') |
---|
1204 | { |
---|
1205 | |
---|
1206 | advance_char (context); |
---|
1207 | context->state = STATE_AFTER_CLOSE_ANGLE; |
---|
1208 | } |
---|
1209 | else if (is_name_start_char (g_utf8_get_char (context->iter))) |
---|
1210 | { |
---|
1211 | context->state = STATE_INSIDE_ATTRIBUTE_NAME; |
---|
1212 | /* start of attribute name */ |
---|
1213 | context->start = context->iter; |
---|
1214 | } |
---|
1215 | else |
---|
1216 | { |
---|
1217 | gchar buf[7]; |
---|
1218 | set_error (context, |
---|
1219 | error, |
---|
1220 | G_MARKUP_ERROR_PARSE, |
---|
1221 | _("Odd character '%s', expected a '>' or '/' " |
---|
1222 | "character to end the start tag of " |
---|
1223 | "element '%s', or optionally an attribute; " |
---|
1224 | "perhaps you used an invalid character in " |
---|
1225 | "an attribute name"), |
---|
1226 | utf8_str (context->iter, buf), |
---|
1227 | current_element (context)); |
---|
1228 | } |
---|
1229 | |
---|
1230 | /* If we're done with attributes, invoke |
---|
1231 | * the start_element callback |
---|
1232 | */ |
---|
1233 | if (context->state == STATE_AFTER_ELISION_SLASH || |
---|
1234 | context->state == STATE_AFTER_CLOSE_ANGLE) |
---|
1235 | { |
---|
1236 | const gchar *start_name; |
---|
1237 | /* Ugly, but the current code expects an empty array instead of NULL */ |
---|
1238 | const gchar *empty = NULL; |
---|
1239 | const gchar **attr_names = ∅ |
---|
1240 | const gchar **attr_values = ∅ |
---|
1241 | GError *tmp_error; |
---|
1242 | |
---|
1243 | /* Call user callback for element start */ |
---|
1244 | start_name = current_element (context); |
---|
1245 | |
---|
1246 | if (context->cur_attr >= 0) |
---|
1247 | { |
---|
1248 | attr_names = (const gchar**)context->attr_names; |
---|
1249 | attr_values = (const gchar**)context->attr_values; |
---|
1250 | } |
---|
1251 | |
---|
1252 | tmp_error = NULL; |
---|
1253 | if (context->parser->start_element) |
---|
1254 | (* context->parser->start_element) (context, |
---|
1255 | start_name, |
---|
1256 | (const gchar **)attr_names, |
---|
1257 | (const gchar **)attr_values, |
---|
1258 | context->user_data, |
---|
1259 | &tmp_error); |
---|
1260 | |
---|
1261 | /* Go ahead and free the attributes. */ |
---|
1262 | for (; context->cur_attr >= 0; context->cur_attr--) |
---|
1263 | { |
---|
1264 | int pos = context->cur_attr; |
---|
1265 | g_free (context->attr_names[pos]); |
---|
1266 | g_free (context->attr_values[pos]); |
---|
1267 | context->attr_names[pos] = context->attr_values[pos] = NULL; |
---|
1268 | } |
---|
1269 | g_assert (context->cur_attr == -1); |
---|
1270 | g_assert (context->attr_names == NULL || |
---|
1271 | context->attr_names[0] == NULL); |
---|
1272 | g_assert (context->attr_values == NULL || |
---|
1273 | context->attr_values[0] == NULL); |
---|
1274 | |
---|
1275 | if (tmp_error != NULL) |
---|
1276 | { |
---|
1277 | mark_error (context, tmp_error); |
---|
1278 | g_propagate_error (error, tmp_error); |
---|
1279 | } |
---|
1280 | } |
---|
1281 | } |
---|
1282 | break; |
---|
1283 | |
---|
1284 | case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN: |
---|
1285 | /* Possible next state: INSIDE_ATTRIBUTE_VALUE_[SQ/DQ] */ |
---|
1286 | |
---|
1287 | skip_spaces (context); |
---|
1288 | |
---|
1289 | if (context->iter != context->current_text_end) |
---|
1290 | { |
---|
1291 | if (*context->iter == '"') |
---|
1292 | { |
---|
1293 | advance_char (context); |
---|
1294 | context->state = STATE_INSIDE_ATTRIBUTE_VALUE_DQ; |
---|
1295 | context->start = context->iter; |
---|
1296 | } |
---|
1297 | else if (*context->iter == '\'') |
---|
1298 | { |
---|
1299 | advance_char (context); |
---|
1300 | context->state = STATE_INSIDE_ATTRIBUTE_VALUE_SQ; |
---|
1301 | context->start = context->iter; |
---|
1302 | } |
---|
1303 | else |
---|
1304 | { |
---|
1305 | gchar buf[7]; |
---|
1306 | set_error (context, |
---|
1307 | error, |
---|
1308 | G_MARKUP_ERROR_PARSE, |
---|
1309 | _("Odd character '%s', expected an open quote mark " |
---|
1310 | "after the equals sign when giving value for " |
---|
1311 | "attribute '%s' of element '%s'"), |
---|
1312 | utf8_str (context->iter, buf), |
---|
1313 | current_attribute (context), |
---|
1314 | current_element (context)); |
---|
1315 | } |
---|
1316 | } |
---|
1317 | break; |
---|
1318 | |
---|
1319 | case STATE_INSIDE_ATTRIBUTE_VALUE_SQ: |
---|
1320 | case STATE_INSIDE_ATTRIBUTE_VALUE_DQ: |
---|
1321 | /* Possible next states: BETWEEN_ATTRIBUTES */ |
---|
1322 | { |
---|
1323 | gchar delim; |
---|
1324 | |
---|
1325 | if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ) |
---|
1326 | { |
---|
1327 | delim = '\''; |
---|
1328 | } |
---|
1329 | else |
---|
1330 | { |
---|
1331 | delim = '"'; |
---|
1332 | } |
---|
1333 | |
---|
1334 | do |
---|
1335 | { |
---|
1336 | if (*context->iter == delim) |
---|
1337 | break; |
---|
1338 | } |
---|
1339 | while (advance_char (context)); |
---|
1340 | } |
---|
1341 | if (context->iter == context->current_text_end) |
---|
1342 | { |
---|
1343 | /* The value hasn't necessarily ended. Merge with |
---|
1344 | * partial chunk, leave state unchanged. |
---|
1345 | */ |
---|
1346 | add_to_partial (context, context->start, context->iter); |
---|
1347 | } |
---|
1348 | else |
---|
1349 | { |
---|
1350 | /* The value has ended at the quote mark. Combine it |
---|
1351 | * with the partial chunk if any; set it for the current |
---|
1352 | * attribute. |
---|
1353 | */ |
---|
1354 | add_to_partial (context, context->start, context->iter); |
---|
1355 | |
---|
1356 | g_assert (context->cur_attr >= 0); |
---|
1357 | |
---|
1358 | if (unescape_text (context, |
---|
1359 | context->partial_chunk->str, |
---|
1360 | context->partial_chunk->str + |
---|
1361 | context->partial_chunk->len, |
---|
1362 | &context->attr_values[context->cur_attr], |
---|
1363 | error)) |
---|
1364 | { |
---|
1365 | /* success, advance past quote and set state. */ |
---|
1366 | advance_char (context); |
---|
1367 | context->state = STATE_BETWEEN_ATTRIBUTES; |
---|
1368 | context->start = NULL; |
---|
1369 | } |
---|
1370 | |
---|
1371 | truncate_partial (context); |
---|
1372 | } |
---|
1373 | break; |
---|
1374 | |
---|
1375 | case STATE_INSIDE_TEXT: |
---|
1376 | /* Possible next states: AFTER_OPEN_ANGLE */ |
---|
1377 | do |
---|
1378 | { |
---|
1379 | if (*context->iter == '<') |
---|
1380 | break; |
---|
1381 | } |
---|
1382 | while (advance_char (context)); |
---|
1383 | |
---|
1384 | /* The text hasn't necessarily ended. Merge with |
---|
1385 | * partial chunk, leave state unchanged. |
---|
1386 | */ |
---|
1387 | |
---|
1388 | add_to_partial (context, context->start, context->iter); |
---|
1389 | |
---|
1390 | if (context->iter != context->current_text_end) |
---|
1391 | { |
---|
1392 | gchar *unescaped = NULL; |
---|
1393 | |
---|
1394 | /* The text has ended at the open angle. Call the text |
---|
1395 | * callback. |
---|
1396 | */ |
---|
1397 | |
---|
1398 | if (unescape_text (context, |
---|
1399 | context->partial_chunk->str, |
---|
1400 | context->partial_chunk->str + |
---|
1401 | context->partial_chunk->len, |
---|
1402 | &unescaped, |
---|
1403 | error)) |
---|
1404 | { |
---|
1405 | GError *tmp_error = NULL; |
---|
1406 | |
---|
1407 | if (context->parser->text) |
---|
1408 | (*context->parser->text) (context, |
---|
1409 | unescaped, |
---|
1410 | strlen (unescaped), |
---|
1411 | context->user_data, |
---|
1412 | &tmp_error); |
---|
1413 | |
---|
1414 | g_free (unescaped); |
---|
1415 | |
---|
1416 | if (tmp_error == NULL) |
---|
1417 | { |
---|
1418 | /* advance past open angle and set state. */ |
---|
1419 | advance_char (context); |
---|
1420 | context->state = STATE_AFTER_OPEN_ANGLE; |
---|
1421 | /* could begin a passthrough */ |
---|
1422 | context->start = context->iter; |
---|
1423 | } |
---|
1424 | else |
---|
1425 | { |
---|
1426 | mark_error (context, tmp_error); |
---|
1427 | g_propagate_error (error, tmp_error); |
---|
1428 | } |
---|
1429 | } |
---|
1430 | |
---|
1431 | truncate_partial (context); |
---|
1432 | } |
---|
1433 | break; |
---|
1434 | |
---|
1435 | case STATE_AFTER_CLOSE_TAG_SLASH: |
---|
1436 | /* Possible next state: INSIDE_CLOSE_TAG_NAME */ |
---|
1437 | if (is_name_start_char (g_utf8_get_char (context->iter))) |
---|
1438 | { |
---|
1439 | context->state = STATE_INSIDE_CLOSE_TAG_NAME; |
---|
1440 | |
---|
1441 | /* start of tag name */ |
---|
1442 | context->start = context->iter; |
---|
1443 | } |
---|
1444 | else |
---|
1445 | { |
---|
1446 | gchar buf[7]; |
---|
1447 | set_error (context, |
---|
1448 | error, |
---|
1449 | G_MARKUP_ERROR_PARSE, |
---|
1450 | _("'%s' is not a valid character following " |
---|
1451 | "the characters '</'; '%s' may not begin an " |
---|
1452 | "element name"), |
---|
1453 | utf8_str (context->iter, buf), |
---|
1454 | utf8_str (context->iter, buf)); |
---|
1455 | } |
---|
1456 | break; |
---|
1457 | |
---|
1458 | case STATE_INSIDE_CLOSE_TAG_NAME: |
---|
1459 | /* Possible next state: AFTER_CLOSE_TAG_NAME */ |
---|
1460 | advance_to_name_end (context); |
---|
1461 | add_to_partial (context, context->start, context->iter); |
---|
1462 | |
---|
1463 | if (context->iter != context->current_text_end) |
---|
1464 | context->state = STATE_AFTER_CLOSE_TAG_NAME; |
---|
1465 | break; |
---|
1466 | |
---|
1467 | case STATE_AFTER_CLOSE_TAG_NAME: |
---|
1468 | /* Possible next state: AFTER_CLOSE_TAG_SLASH */ |
---|
1469 | |
---|
1470 | skip_spaces (context); |
---|
1471 | |
---|
1472 | if (context->iter != context->current_text_end) |
---|
1473 | { |
---|
1474 | gchar *close_name; |
---|
1475 | |
---|
1476 | /* The name has ended. Combine it with the partial chunk |
---|
1477 | * if any; check that it matches stack top and pop |
---|
1478 | * stack; invoke proper callback; enter next state. |
---|
1479 | */ |
---|
1480 | close_name = g_string_free (context->partial_chunk, FALSE); |
---|
1481 | context->partial_chunk = NULL; |
---|
1482 | |
---|
1483 | if (*context->iter != '>') |
---|
1484 | { |
---|
1485 | gchar buf[7]; |
---|
1486 | set_error (context, |
---|
1487 | error, |
---|
1488 | G_MARKUP_ERROR_PARSE, |
---|
1489 | _("'%s' is not a valid character following " |
---|
1490 | "the close element name '%s'; the allowed " |
---|
1491 | "character is '>'"), |
---|
1492 | utf8_str (context->iter, buf), |
---|
1493 | close_name); |
---|
1494 | } |
---|
1495 | else if (context->tag_stack == NULL) |
---|
1496 | { |
---|
1497 | set_error (context, |
---|
1498 | error, |
---|
1499 | G_MARKUP_ERROR_PARSE, |
---|
1500 | _("Element '%s' was closed, no element " |
---|
1501 | "is currently open"), |
---|
1502 | close_name); |
---|
1503 | } |
---|
1504 | else if (strcmp (close_name, current_element (context)) != 0) |
---|
1505 | { |
---|
1506 | set_error (context, |
---|
1507 | error, |
---|
1508 | G_MARKUP_ERROR_PARSE, |
---|
1509 | _("Element '%s' was closed, but the currently " |
---|
1510 | "open element is '%s'"), |
---|
1511 | close_name, |
---|
1512 | current_element (context)); |
---|
1513 | } |
---|
1514 | else |
---|
1515 | { |
---|
1516 | GError *tmp_error; |
---|
1517 | advance_char (context); |
---|
1518 | context->state = STATE_AFTER_CLOSE_ANGLE; |
---|
1519 | context->start = NULL; |
---|
1520 | |
---|
1521 | /* call the end_element callback */ |
---|
1522 | tmp_error = NULL; |
---|
1523 | if (context->parser->end_element) |
---|
1524 | (* context->parser->end_element) (context, |
---|
1525 | close_name, |
---|
1526 | context->user_data, |
---|
1527 | &tmp_error); |
---|
1528 | |
---|
1529 | |
---|
1530 | /* Pop the tag stack */ |
---|
1531 | g_free (context->tag_stack->data); |
---|
1532 | context->tag_stack = g_slist_delete_link (context->tag_stack, |
---|
1533 | context->tag_stack); |
---|
1534 | |
---|
1535 | if (tmp_error) |
---|
1536 | { |
---|
1537 | mark_error (context, tmp_error); |
---|
1538 | g_propagate_error (error, tmp_error); |
---|
1539 | } |
---|
1540 | } |
---|
1541 | |
---|
1542 | g_free (close_name); |
---|
1543 | } |
---|
1544 | break; |
---|
1545 | |
---|
1546 | case STATE_INSIDE_PASSTHROUGH: |
---|
1547 | /* Possible next state: AFTER_CLOSE_ANGLE */ |
---|
1548 | do |
---|
1549 | { |
---|
1550 | if (*context->iter == '<') |
---|
1551 | context->balance++; |
---|
1552 | if (*context->iter == '>') |
---|
1553 | { |
---|
1554 | context->balance--; |
---|
1555 | add_to_partial (context, context->start, context->iter); |
---|
1556 | context->start = context->iter; |
---|
1557 | if ((g_str_has_prefix (context->partial_chunk->str, "<?") |
---|
1558 | && g_str_has_suffix (context->partial_chunk->str, "?")) || |
---|
1559 | (g_str_has_prefix (context->partial_chunk->str, "<!--") |
---|
1560 | && g_str_has_suffix (context->partial_chunk->str, "--")) || |
---|
1561 | (g_str_has_prefix (context->partial_chunk->str, "<![CDATA[") |
---|
1562 | && g_str_has_suffix (context->partial_chunk->str, "]]")) || |
---|
1563 | (g_str_has_prefix (context->partial_chunk->str, "<!DOCTYPE") |
---|
1564 | && context->balance == 0)) |
---|
1565 | break; |
---|
1566 | } |
---|
1567 | } |
---|
1568 | while (advance_char (context)); |
---|
1569 | |
---|
1570 | if (context->iter == context->current_text_end) |
---|
1571 | { |
---|
1572 | /* The passthrough hasn't necessarily ended. Merge with |
---|
1573 | * partial chunk, leave state unchanged. |
---|
1574 | */ |
---|
1575 | add_to_partial (context, context->start, context->iter); |
---|
1576 | } |
---|
1577 | else |
---|
1578 | { |
---|
1579 | /* The passthrough has ended at the close angle. Combine |
---|
1580 | * it with the partial chunk if any. Call the passthrough |
---|
1581 | * callback. Note that the open/close angles are |
---|
1582 | * included in the text of the passthrough. |
---|
1583 | */ |
---|
1584 | GError *tmp_error = NULL; |
---|
1585 | |
---|
1586 | advance_char (context); /* advance past close angle */ |
---|
1587 | add_to_partial (context, context->start, context->iter); |
---|
1588 | |
---|
1589 | if (context->parser->passthrough) |
---|
1590 | (*context->parser->passthrough) (context, |
---|
1591 | context->partial_chunk->str, |
---|
1592 | context->partial_chunk->len, |
---|
1593 | context->user_data, |
---|
1594 | &tmp_error); |
---|
1595 | |
---|
1596 | truncate_partial (context); |
---|
1597 | |
---|
1598 | if (tmp_error == NULL) |
---|
1599 | { |
---|
1600 | context->state = STATE_AFTER_CLOSE_ANGLE; |
---|
1601 | context->start = context->iter; /* could begin text */ |
---|
1602 | } |
---|
1603 | else |
---|
1604 | { |
---|
1605 | mark_error (context, tmp_error); |
---|
1606 | g_propagate_error (error, tmp_error); |
---|
1607 | } |
---|
1608 | } |
---|
1609 | break; |
---|
1610 | |
---|
1611 | case STATE_ERROR: |
---|
1612 | goto finished; |
---|
1613 | break; |
---|
1614 | |
---|
1615 | default: |
---|
1616 | g_assert_not_reached (); |
---|
1617 | break; |
---|
1618 | } |
---|
1619 | } |
---|
1620 | |
---|
1621 | finished: |
---|
1622 | context->parsing = FALSE; |
---|
1623 | |
---|
1624 | return context->state != STATE_ERROR; |
---|
1625 | } |
---|
1626 | |
---|
1627 | /** |
---|
1628 | * g_markup_parse_context_end_parse: |
---|
1629 | * @context: a #GMarkupParseContext |
---|
1630 | * @error: return location for a #GError |
---|
1631 | * |
---|
1632 | * Signals to the #GMarkupParseContext that all data has been |
---|
1633 | * fed into the parse context with g_markup_parse_context_parse(). |
---|
1634 | * This function reports an error if the document isn't complete, |
---|
1635 | * for example if elements are still open. |
---|
1636 | * |
---|
1637 | * Return value: %TRUE on success, %FALSE if an error was set |
---|
1638 | **/ |
---|
1639 | gboolean |
---|
1640 | g_markup_parse_context_end_parse (GMarkupParseContext *context, |
---|
1641 | GError **error) |
---|
1642 | { |
---|
1643 | g_return_val_if_fail (context != NULL, FALSE); |
---|
1644 | g_return_val_if_fail (!context->parsing, FALSE); |
---|
1645 | g_return_val_if_fail (context->state != STATE_ERROR, FALSE); |
---|
1646 | |
---|
1647 | if (context->partial_chunk != NULL) |
---|
1648 | { |
---|
1649 | g_string_free (context->partial_chunk, TRUE); |
---|
1650 | context->partial_chunk = NULL; |
---|
1651 | } |
---|
1652 | |
---|
1653 | if (context->document_empty) |
---|
1654 | { |
---|
1655 | set_error (context, error, G_MARKUP_ERROR_EMPTY, |
---|
1656 | _("Document was empty or contained only whitespace")); |
---|
1657 | return FALSE; |
---|
1658 | } |
---|
1659 | |
---|
1660 | context->parsing = TRUE; |
---|
1661 | |
---|
1662 | switch (context->state) |
---|
1663 | { |
---|
1664 | case STATE_START: |
---|
1665 | /* Nothing to do */ |
---|
1666 | break; |
---|
1667 | |
---|
1668 | case STATE_AFTER_OPEN_ANGLE: |
---|
1669 | set_error (context, error, G_MARKUP_ERROR_PARSE, |
---|
1670 | _("Document ended unexpectedly just after an open angle bracket '<'")); |
---|
1671 | break; |
---|
1672 | |
---|
1673 | case STATE_AFTER_CLOSE_ANGLE: |
---|
1674 | if (context->tag_stack != NULL) |
---|
1675 | { |
---|
1676 | /* Error message the same as for INSIDE_TEXT */ |
---|
1677 | set_error (context, error, G_MARKUP_ERROR_PARSE, |
---|
1678 | _("Document ended unexpectedly with elements still open - " |
---|
1679 | "'%s' was the last element opened"), |
---|
1680 | current_element (context)); |
---|
1681 | } |
---|
1682 | break; |
---|
1683 | |
---|
1684 | case STATE_AFTER_ELISION_SLASH: |
---|
1685 | set_error (context, error, G_MARKUP_ERROR_PARSE, |
---|
1686 | _("Document ended unexpectedly, expected to see a close angle " |
---|
1687 | "bracket ending the tag <%s/>"), current_element (context)); |
---|
1688 | break; |
---|
1689 | |
---|
1690 | case STATE_INSIDE_OPEN_TAG_NAME: |
---|
1691 | set_error (context, error, G_MARKUP_ERROR_PARSE, |
---|
1692 | _("Document ended unexpectedly inside an element name")); |
---|
1693 | break; |
---|
1694 | |
---|
1695 | case STATE_INSIDE_ATTRIBUTE_NAME: |
---|
1696 | set_error (context, error, G_MARKUP_ERROR_PARSE, |
---|
1697 | _("Document ended unexpectedly inside an attribute name")); |
---|
1698 | break; |
---|
1699 | |
---|
1700 | case STATE_BETWEEN_ATTRIBUTES: |
---|
1701 | set_error (context, error, G_MARKUP_ERROR_PARSE, |
---|
1702 | _("Document ended unexpectedly inside an element-opening " |
---|
1703 | "tag.")); |
---|
1704 | break; |
---|
1705 | |
---|
1706 | case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN: |
---|
1707 | set_error (context, error, G_MARKUP_ERROR_PARSE, |
---|
1708 | _("Document ended unexpectedly after the equals sign " |
---|
1709 | "following an attribute name; no attribute value")); |
---|
1710 | break; |
---|
1711 | |
---|
1712 | case STATE_INSIDE_ATTRIBUTE_VALUE_SQ: |
---|
1713 | case STATE_INSIDE_ATTRIBUTE_VALUE_DQ: |
---|
1714 | set_error (context, error, G_MARKUP_ERROR_PARSE, |
---|
1715 | _("Document ended unexpectedly while inside an attribute " |
---|
1716 | "value")); |
---|
1717 | break; |
---|
1718 | |
---|
1719 | case STATE_INSIDE_TEXT: |
---|
1720 | g_assert (context->tag_stack != NULL); |
---|
1721 | set_error (context, error, G_MARKUP_ERROR_PARSE, |
---|
1722 | _("Document ended unexpectedly with elements still open - " |
---|
1723 | "'%s' was the last element opened"), |
---|
1724 | current_element (context)); |
---|
1725 | break; |
---|
1726 | |
---|
1727 | case STATE_AFTER_CLOSE_TAG_SLASH: |
---|
1728 | case STATE_INSIDE_CLOSE_TAG_NAME: |
---|
1729 | set_error (context, error, G_MARKUP_ERROR_PARSE, |
---|
1730 | _("Document ended unexpectedly inside the close tag for " |
---|
1731 | "element '%s'"), current_element); |
---|
1732 | break; |
---|
1733 | |
---|
1734 | case STATE_INSIDE_PASSTHROUGH: |
---|
1735 | set_error (context, error, G_MARKUP_ERROR_PARSE, |
---|
1736 | _("Document ended unexpectedly inside a comment or " |
---|
1737 | "processing instruction")); |
---|
1738 | break; |
---|
1739 | |
---|
1740 | case STATE_ERROR: |
---|
1741 | default: |
---|
1742 | g_assert_not_reached (); |
---|
1743 | break; |
---|
1744 | } |
---|
1745 | |
---|
1746 | context->parsing = FALSE; |
---|
1747 | |
---|
1748 | return context->state != STATE_ERROR; |
---|
1749 | } |
---|
1750 | |
---|
1751 | /** |
---|
1752 | * g_markup_parse_context_get_element: |
---|
1753 | * @context: a #GMarkupParseContext |
---|
1754 | * @returns: the name of the currently open element, or %NULL |
---|
1755 | * |
---|
1756 | * Retrieves the name of the currently open element. |
---|
1757 | * |
---|
1758 | * Since: 2.2 |
---|
1759 | **/ |
---|
1760 | G_CONST_RETURN gchar * |
---|
1761 | g_markup_parse_context_get_element (GMarkupParseContext *context) |
---|
1762 | { |
---|
1763 | g_return_val_if_fail (context != NULL, NULL); |
---|
1764 | |
---|
1765 | if (context->tag_stack == NULL) |
---|
1766 | return NULL; |
---|
1767 | else |
---|
1768 | return current_element (context); |
---|
1769 | } |
---|
1770 | |
---|
1771 | /** |
---|
1772 | * g_markup_parse_context_get_position: |
---|
1773 | * @context: a #GMarkupParseContext |
---|
1774 | * @line_number: return location for a line number, or %NULL |
---|
1775 | * @char_number: return location for a char-on-line number, or %NULL |
---|
1776 | * |
---|
1777 | * Retrieves the current line number and the number of the character on |
---|
1778 | * that line. Intended for use in error messages; there are no strict |
---|
1779 | * semantics for what constitutes the "current" line number other than |
---|
1780 | * "the best number we could come up with for error messages." |
---|
1781 | * |
---|
1782 | **/ |
---|
1783 | void |
---|
1784 | g_markup_parse_context_get_position (GMarkupParseContext *context, |
---|
1785 | gint *line_number, |
---|
1786 | gint *char_number) |
---|
1787 | { |
---|
1788 | g_return_if_fail (context != NULL); |
---|
1789 | |
---|
1790 | if (line_number) |
---|
1791 | *line_number = context->line_number; |
---|
1792 | |
---|
1793 | if (char_number) |
---|
1794 | *char_number = context->char_number; |
---|
1795 | } |
---|
1796 | |
---|
1797 | static void |
---|
1798 | append_escaped_text (GString *str, |
---|
1799 | const gchar *text, |
---|
1800 | gssize length) |
---|
1801 | { |
---|
1802 | const gchar *p; |
---|
1803 | const gchar *end; |
---|
1804 | |
---|
1805 | p = text; |
---|
1806 | end = text + length; |
---|
1807 | |
---|
1808 | while (p != end) |
---|
1809 | { |
---|
1810 | const gchar *next; |
---|
1811 | next = g_utf8_next_char (p); |
---|
1812 | |
---|
1813 | switch (*p) |
---|
1814 | { |
---|
1815 | case '&': |
---|
1816 | g_string_append (str, "&"); |
---|
1817 | break; |
---|
1818 | |
---|
1819 | case '<': |
---|
1820 | g_string_append (str, "<"); |
---|
1821 | break; |
---|
1822 | |
---|
1823 | case '>': |
---|
1824 | g_string_append (str, ">"); |
---|
1825 | break; |
---|
1826 | |
---|
1827 | case '\'': |
---|
1828 | g_string_append (str, "'"); |
---|
1829 | break; |
---|
1830 | |
---|
1831 | case '"': |
---|
1832 | g_string_append (str, """); |
---|
1833 | break; |
---|
1834 | |
---|
1835 | default: |
---|
1836 | g_string_append_len (str, p, next - p); |
---|
1837 | break; |
---|
1838 | } |
---|
1839 | |
---|
1840 | p = next; |
---|
1841 | } |
---|
1842 | } |
---|
1843 | |
---|
1844 | /** |
---|
1845 | * g_markup_escape_text: |
---|
1846 | * @text: some valid UTF-8 text |
---|
1847 | * @length: length of @text in bytes |
---|
1848 | * |
---|
1849 | * Escapes text so that the markup parser will parse it verbatim. |
---|
1850 | * Less than, greater than, ampersand, etc. are replaced with the |
---|
1851 | * corresponding entities. This function would typically be used |
---|
1852 | * when writing out a file to be parsed with the markup parser. |
---|
1853 | * |
---|
1854 | * Note that this function doesn't protect whitespace and line endings |
---|
1855 | * from being processed according to the XML rules for normalization |
---|
1856 | * of line endings and attribute values. |
---|
1857 | * |
---|
1858 | * Return value: escaped text |
---|
1859 | **/ |
---|
1860 | gchar* |
---|
1861 | g_markup_escape_text (const gchar *text, |
---|
1862 | gssize length) |
---|
1863 | { |
---|
1864 | GString *str; |
---|
1865 | |
---|
1866 | g_return_val_if_fail (text != NULL, NULL); |
---|
1867 | |
---|
1868 | if (length < 0) |
---|
1869 | length = strlen (text); |
---|
1870 | |
---|
1871 | str = g_string_new (NULL); |
---|
1872 | append_escaped_text (str, text, length); |
---|
1873 | |
---|
1874 | return g_string_free (str, FALSE); |
---|
1875 | } |
---|
1876 | |
---|
1877 | /** |
---|
1878 | * find_conversion: |
---|
1879 | * @format: a printf-style format string |
---|
1880 | * @after: location to store a pointer to the character after |
---|
1881 | * the returned conversion. On a %NULL return, returns the |
---|
1882 | * pointer to the trailing NUL in the string |
---|
1883 | * |
---|
1884 | * Find the next conversion in a printf-style format string. |
---|
1885 | * Partially based on code from printf-parser.c, |
---|
1886 | * Copyright (C) 1999-2000, 2002-2003 Free Software Foundation, Inc. |
---|
1887 | * |
---|
1888 | * Return value: pointer to the next conversion in @format, |
---|
1889 | * or %NULL, if none. |
---|
1890 | **/ |
---|
1891 | static const char * |
---|
1892 | find_conversion (const char *format, |
---|
1893 | const char **after) |
---|
1894 | { |
---|
1895 | const char *start = format; |
---|
1896 | const char *cp; |
---|
1897 | |
---|
1898 | while (*start != '\0' && *start != '%') |
---|
1899 | start++; |
---|
1900 | |
---|
1901 | if (*start == '\0') |
---|
1902 | { |
---|
1903 | *after = start; |
---|
1904 | return NULL; |
---|
1905 | } |
---|
1906 | |
---|
1907 | cp = start + 1; |
---|
1908 | |
---|
1909 | if (*cp == '\0') |
---|
1910 | { |
---|
1911 | *after = cp; |
---|
1912 | return NULL; |
---|
1913 | } |
---|
1914 | |
---|
1915 | /* Test for positional argument. */ |
---|
1916 | if (*cp >= '0' && *cp <= '9') |
---|
1917 | { |
---|
1918 | const char *np; |
---|
1919 | |
---|
1920 | for (np = cp; *np >= '0' && *np <= '9'; np++) |
---|
1921 | ; |
---|
1922 | if (*np == '$') |
---|
1923 | cp = np + 1; |
---|
1924 | } |
---|
1925 | |
---|
1926 | /* Skip the flags. */ |
---|
1927 | for (;;) |
---|
1928 | { |
---|
1929 | if (*cp == '\'' || |
---|
1930 | *cp == '-' || |
---|
1931 | *cp == '+' || |
---|
1932 | *cp == ' ' || |
---|
1933 | *cp == '#' || |
---|
1934 | *cp == '0') |
---|
1935 | cp++; |
---|
1936 | else |
---|
1937 | break; |
---|
1938 | } |
---|
1939 | |
---|
1940 | /* Skip the field width. */ |
---|
1941 | if (*cp == '*') |
---|
1942 | { |
---|
1943 | cp++; |
---|
1944 | |
---|
1945 | /* Test for positional argument. */ |
---|
1946 | if (*cp >= '0' && *cp <= '9') |
---|
1947 | { |
---|
1948 | const char *np; |
---|
1949 | |
---|
1950 | for (np = cp; *np >= '0' && *np <= '9'; np++) |
---|
1951 | ; |
---|
1952 | if (*np == '$') |
---|
1953 | cp = np + 1; |
---|
1954 | } |
---|
1955 | } |
---|
1956 | else |
---|
1957 | { |
---|
1958 | for (; *cp >= '0' && *cp <= '9'; cp++) |
---|
1959 | ; |
---|
1960 | } |
---|
1961 | |
---|
1962 | /* Skip the precision. */ |
---|
1963 | if (*cp == '.') |
---|
1964 | { |
---|
1965 | cp++; |
---|
1966 | if (*cp == '*') |
---|
1967 | { |
---|
1968 | /* Test for positional argument. */ |
---|
1969 | if (*cp >= '0' && *cp <= '9') |
---|
1970 | { |
---|
1971 | const char *np; |
---|
1972 | |
---|
1973 | for (np = cp; *np >= '0' && *np <= '9'; np++) |
---|
1974 | ; |
---|
1975 | if (*np == '$') |
---|
1976 | cp = np + 1; |
---|
1977 | } |
---|
1978 | } |
---|
1979 | else |
---|
1980 | { |
---|
1981 | for (; *cp >= '0' && *cp <= '9'; cp++) |
---|
1982 | ; |
---|
1983 | } |
---|
1984 | } |
---|
1985 | |
---|
1986 | /* Skip argument type/size specifiers. */ |
---|
1987 | while (*cp == 'h' || |
---|
1988 | *cp == 'L' || |
---|
1989 | *cp == 'l' || |
---|
1990 | *cp == 'j' || |
---|
1991 | *cp == 'z' || |
---|
1992 | *cp == 'Z' || |
---|
1993 | *cp == 't') |
---|
1994 | cp++; |
---|
1995 | |
---|
1996 | /* Skip the conversion character. */ |
---|
1997 | cp++; |
---|
1998 | |
---|
1999 | *after = cp; |
---|
2000 | return start; |
---|
2001 | } |
---|
2002 | |
---|
2003 | /** |
---|
2004 | * g_markup_vprintf_escaped: |
---|
2005 | * @format: printf() style format string |
---|
2006 | * @args: variable argument list, similar to vprintf() |
---|
2007 | * |
---|
2008 | * Formats the data in @args according to @format, escaping |
---|
2009 | * all string and character arguments in the fashion |
---|
2010 | * of g_markup_escape_text(). See g_markup_printf_escaped(). |
---|
2011 | * |
---|
2012 | * Return value: newly allocated result from formatting |
---|
2013 | * operation. Free with g_free(). |
---|
2014 | * |
---|
2015 | * Since: 2.4 |
---|
2016 | **/ |
---|
2017 | char * |
---|
2018 | g_markup_vprintf_escaped (const char *format, |
---|
2019 | va_list args) |
---|
2020 | { |
---|
2021 | GString *format1; |
---|
2022 | GString *format2; |
---|
2023 | GString *result = NULL; |
---|
2024 | gchar *output1 = NULL; |
---|
2025 | gchar *output2 = NULL; |
---|
2026 | const char *p, *op1, *op2; |
---|
2027 | va_list args2; |
---|
2028 | |
---|
2029 | /* The technique here, is that we make two format strings that |
---|
2030 | * have the identical conversions in the identical order to the |
---|
2031 | * original strings, but differ in the text in-between. We |
---|
2032 | * then use the normal g_strdup_vprintf() to format the arguments |
---|
2033 | * with the two new format strings. By comparing the results, |
---|
2034 | * we can figure out what segments of the output come from |
---|
2035 | * the the original format string, and what from the arguments, |
---|
2036 | * and thus know what portions of the string to escape. |
---|
2037 | * |
---|
2038 | * For instance, for: |
---|
2039 | * |
---|
2040 | * g_markup_printf_escaped ("%s ate %d apples", "Susan & Fred", 5); |
---|
2041 | * |
---|
2042 | * We form the two format strings "%sX%dX" and %sY%sY". The results |
---|
2043 | * of formatting with those two strings are |
---|
2044 | * |
---|
2045 | * "%sX%dX" => "Susan & FredX5X" |
---|
2046 | * "%sY%dY" => "Susan & FredY5Y" |
---|
2047 | * |
---|
2048 | * To find the span of the first argument, we find the first position |
---|
2049 | * where the two arguments differ, which tells us that the first |
---|
2050 | * argument formatted to "Susan & Fred". We then escape that |
---|
2051 | * to "Susan & Fred" and join up with the intermediate portions |
---|
2052 | * of the format string and the second argument to get |
---|
2053 | * "Susan & Fred ate 5 apples". |
---|
2054 | */ |
---|
2055 | |
---|
2056 | /* Create the two modified format strings |
---|
2057 | */ |
---|
2058 | format1 = g_string_new (NULL); |
---|
2059 | format2 = g_string_new (NULL); |
---|
2060 | p = format; |
---|
2061 | while (TRUE) |
---|
2062 | { |
---|
2063 | const char *after; |
---|
2064 | const char *conv = find_conversion (p, &after); |
---|
2065 | if (!conv) |
---|
2066 | break; |
---|
2067 | |
---|
2068 | g_string_append_len (format1, conv, after - conv); |
---|
2069 | g_string_append_c (format1, 'X'); |
---|
2070 | g_string_append_len (format2, conv, after - conv); |
---|
2071 | g_string_append_c (format2, 'Y'); |
---|
2072 | |
---|
2073 | p = after; |
---|
2074 | } |
---|
2075 | |
---|
2076 | /* Use them to format the arguments |
---|
2077 | */ |
---|
2078 | G_VA_COPY (args2, args); |
---|
2079 | |
---|
2080 | output1 = g_strdup_vprintf (format1->str, args); |
---|
2081 | va_end (args); |
---|
2082 | if (!output1) |
---|
2083 | goto cleanup; |
---|
2084 | |
---|
2085 | output2 = g_strdup_vprintf (format2->str, args2); |
---|
2086 | va_end (args2); |
---|
2087 | if (!output2) |
---|
2088 | goto cleanup; |
---|
2089 | |
---|
2090 | result = g_string_new (NULL); |
---|
2091 | |
---|
2092 | /* Iterate through the original format string again, |
---|
2093 | * copying the non-conversion portions and the escaped |
---|
2094 | * converted arguments to the output string. |
---|
2095 | */ |
---|
2096 | op1 = output1; |
---|
2097 | op2 = output2; |
---|
2098 | p = format; |
---|
2099 | while (TRUE) |
---|
2100 | { |
---|
2101 | const char *after; |
---|
2102 | const char *output_start; |
---|
2103 | const char *conv = find_conversion (p, &after); |
---|
2104 | char *escaped; |
---|
2105 | |
---|
2106 | if (!conv) /* The end, after points to the trailing \0 */ |
---|
2107 | { |
---|
2108 | g_string_append_len (result, p, after - p); |
---|
2109 | break; |
---|
2110 | } |
---|
2111 | |
---|
2112 | g_string_append_len (result, p, conv - p); |
---|
2113 | output_start = op1; |
---|
2114 | while (*op1 == *op2) |
---|
2115 | { |
---|
2116 | op1++; |
---|
2117 | op2++; |
---|
2118 | } |
---|
2119 | |
---|
2120 | escaped = g_markup_escape_text (output_start, op1 - output_start); |
---|
2121 | g_string_append (result, escaped); |
---|
2122 | g_free (escaped); |
---|
2123 | |
---|
2124 | p = after; |
---|
2125 | op1++; |
---|
2126 | op2++; |
---|
2127 | } |
---|
2128 | |
---|
2129 | cleanup: |
---|
2130 | g_string_free (format1, TRUE); |
---|
2131 | g_string_free (format2, TRUE); |
---|
2132 | g_free (output1); |
---|
2133 | g_free (output2); |
---|
2134 | |
---|
2135 | if (result) |
---|
2136 | return g_string_free (result, FALSE); |
---|
2137 | else |
---|
2138 | return NULL; |
---|
2139 | } |
---|
2140 | |
---|
2141 | /** |
---|
2142 | * g_markup_printf_escaped: |
---|
2143 | * @format: printf() style format string |
---|
2144 | * @Varargs: the arguments to insert in the format string |
---|
2145 | * |
---|
2146 | * Formats arguments according to @format, escaping |
---|
2147 | * all string and character arguments in the fashion |
---|
2148 | * of g_markup_escape_text(). This is useful when you |
---|
2149 | * want to insert literal strings into XML-style markup |
---|
2150 | * output, without having to worry that the strings |
---|
2151 | * might themselves contain markup. |
---|
2152 | * |
---|
2153 | * <informalexample><programlisting> |
---|
2154 | * const char *store = "Fortnum & Mason"; |
---|
2155 | * const char *item = "Tea"; |
---|
2156 | * char *output; |
---|
2157 | * |
---|
2158 | * output = g_markup_printf_escaped ("<purchase>" |
---|
2159 | * "<store>%s</store>" |
---|
2160 | * "<item>%s</item>" |
---|
2161 | * "</purchase>", |
---|
2162 | * store, item); |
---|
2163 | * </programlisting></informalexample> |
---|
2164 | * |
---|
2165 | * Return value: newly allocated result from formatting |
---|
2166 | * operation. Free with g_free(). |
---|
2167 | * |
---|
2168 | * Since: 2.4 |
---|
2169 | **/ |
---|
2170 | char * |
---|
2171 | g_markup_printf_escaped (const char *format, ...) |
---|
2172 | { |
---|
2173 | char *result; |
---|
2174 | va_list args; |
---|
2175 | |
---|
2176 | va_start (args, format); |
---|
2177 | result = g_markup_vprintf_escaped (format, args); |
---|
2178 | va_end (args); |
---|
2179 | |
---|
2180 | return result; |
---|
2181 | } |
---|