1 | /* gmarkup.c - Simple XML-like parser |
---|
2 | * |
---|
3 | * Copyright 2000 Red Hat, Inc. |
---|
4 | * |
---|
5 | * GLib is free software; you can redistribute it and/or modify it |
---|
6 | * under the terms of the GNU Lesser General Public License as |
---|
7 | * published by the Free Software Foundation; either version 2 of the |
---|
8 | * License, or (at your option) any later version. |
---|
9 | * |
---|
10 | * GLib is distributed in the hope that it will be useful, |
---|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
13 | * Lesser General Public License for more details. |
---|
14 | * |
---|
15 | * You should have received a copy of the GNU Lesser General Public |
---|
16 | * License along with GLib; see the file COPYING.LIB. If not, |
---|
17 | * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
---|
18 | * Boston, MA 02111-1307, USA. |
---|
19 | */ |
---|
20 | |
---|
21 | #include "config.h" |
---|
22 | |
---|
23 | #include <string.h> |
---|
24 | #include <stdio.h> |
---|
25 | #include <stdlib.h> |
---|
26 | #include <errno.h> |
---|
27 | |
---|
28 | #include "glib.h" |
---|
29 | |
---|
30 | #include "glibintl.h" |
---|
31 | |
---|
32 | GQuark |
---|
33 | g_markup_error_quark (void) |
---|
34 | { |
---|
35 | static GQuark error_quark = 0; |
---|
36 | |
---|
37 | if (error_quark == 0) |
---|
38 | error_quark = g_quark_from_static_string ("g-markup-error-quark"); |
---|
39 | |
---|
40 | return error_quark; |
---|
41 | } |
---|
42 | |
---|
43 | typedef enum |
---|
44 | { |
---|
45 | STATE_START, |
---|
46 | STATE_AFTER_OPEN_ANGLE, |
---|
47 | STATE_AFTER_CLOSE_ANGLE, |
---|
48 | STATE_AFTER_ELISION_SLASH, /* the slash that obviates need for end element */ |
---|
49 | STATE_INSIDE_OPEN_TAG_NAME, |
---|
50 | STATE_INSIDE_ATTRIBUTE_NAME, |
---|
51 | STATE_BETWEEN_ATTRIBUTES, |
---|
52 | STATE_AFTER_ATTRIBUTE_EQUALS_SIGN, |
---|
53 | STATE_INSIDE_ATTRIBUTE_VALUE_SQ, |
---|
54 | STATE_INSIDE_ATTRIBUTE_VALUE_DQ, |
---|
55 | STATE_INSIDE_TEXT, |
---|
56 | STATE_AFTER_CLOSE_TAG_SLASH, |
---|
57 | STATE_INSIDE_CLOSE_TAG_NAME, |
---|
58 | STATE_INSIDE_PASSTHROUGH, |
---|
59 | STATE_ERROR |
---|
60 | } GMarkupParseState; |
---|
61 | |
---|
62 | struct _GMarkupParseContext |
---|
63 | { |
---|
64 | const GMarkupParser *parser; |
---|
65 | |
---|
66 | GMarkupParseFlags flags; |
---|
67 | |
---|
68 | gint line_number; |
---|
69 | gint char_number; |
---|
70 | |
---|
71 | gpointer user_data; |
---|
72 | GDestroyNotify dnotify; |
---|
73 | |
---|
74 | /* A piece of character data or an element that |
---|
75 | * hasn't "ended" yet so we haven't yet called |
---|
76 | * the callback for it. |
---|
77 | */ |
---|
78 | GString *partial_chunk; |
---|
79 | |
---|
80 | GMarkupParseState state; |
---|
81 | GSList *tag_stack; |
---|
82 | gchar **attr_names; |
---|
83 | gchar **attr_values; |
---|
84 | gint cur_attr; |
---|
85 | gint alloc_attrs; |
---|
86 | |
---|
87 | const gchar *current_text; |
---|
88 | gssize current_text_len; |
---|
89 | const gchar *current_text_end; |
---|
90 | |
---|
91 | GString *leftover_char_portion; |
---|
92 | |
---|
93 | /* used to save the start of the last interesting thingy */ |
---|
94 | const gchar *start; |
---|
95 | |
---|
96 | const gchar *iter; |
---|
97 | |
---|
98 | guint document_empty : 1; |
---|
99 | guint parsing : 1; |
---|
100 | gint balance; |
---|
101 | }; |
---|
102 | |
---|
103 | /** |
---|
104 | * g_markup_parse_context_new: |
---|
105 | * @parser: a #GMarkupParser |
---|
106 | * @flags: one or more #GMarkupParseFlags |
---|
107 | * @user_data: user data to pass to #GMarkupParser functions |
---|
108 | * @user_data_dnotify: user data destroy notifier called when the parse context is freed |
---|
109 | * |
---|
110 | * Creates a new parse context. A parse context is used to parse |
---|
111 | * marked-up documents. You can feed any number of documents into |
---|
112 | * a context, as long as no errors occur; once an error occurs, |
---|
113 | * the parse context can't continue to parse text (you have to free it |
---|
114 | * and create a new parse context). |
---|
115 | * |
---|
116 | * Return value: a new #GMarkupParseContext |
---|
117 | **/ |
---|
118 | GMarkupParseContext * |
---|
119 | g_markup_parse_context_new (const GMarkupParser *parser, |
---|
120 | GMarkupParseFlags flags, |
---|
121 | gpointer user_data, |
---|
122 | GDestroyNotify user_data_dnotify) |
---|
123 | { |
---|
124 | GMarkupParseContext *context; |
---|
125 | |
---|
126 | g_return_val_if_fail (parser != NULL, NULL); |
---|
127 | |
---|
128 | context = g_new (GMarkupParseContext, 1); |
---|
129 | |
---|
130 | context->parser = parser; |
---|
131 | context->flags = flags; |
---|
132 | context->user_data = user_data; |
---|
133 | context->dnotify = user_data_dnotify; |
---|
134 | |
---|
135 | context->line_number = 1; |
---|
136 | context->char_number = 1; |
---|
137 | |
---|
138 | context->partial_chunk = NULL; |
---|
139 | |
---|
140 | context->state = STATE_START; |
---|
141 | context->tag_stack = NULL; |
---|
142 | context->attr_names = NULL; |
---|
143 | context->attr_values = NULL; |
---|
144 | context->cur_attr = -1; |
---|
145 | context->alloc_attrs = 0; |
---|
146 | |
---|
147 | context->current_text = NULL; |
---|
148 | context->current_text_len = -1; |
---|
149 | context->current_text_end = NULL; |
---|
150 | context->leftover_char_portion = NULL; |
---|
151 | |
---|
152 | context->start = NULL; |
---|
153 | context->iter = NULL; |
---|
154 | |
---|
155 | context->document_empty = TRUE; |
---|
156 | context->parsing = FALSE; |
---|
157 | |
---|
158 | context->balance = 0; |
---|
159 | |
---|
160 | return context; |
---|
161 | } |
---|
162 | |
---|
163 | /** |
---|
164 | * g_markup_parse_context_free: |
---|
165 | * @context: a #GMarkupParseContext |
---|
166 | * |
---|
167 | * Frees a #GMarkupParseContext. Can't be called from inside |
---|
168 | * one of the #GMarkupParser functions. |
---|
169 | * |
---|
170 | **/ |
---|
171 | void |
---|
172 | g_markup_parse_context_free (GMarkupParseContext *context) |
---|
173 | { |
---|
174 | g_return_if_fail (context != NULL); |
---|
175 | g_return_if_fail (!context->parsing); |
---|
176 | |
---|
177 | if (context->dnotify) |
---|
178 | (* context->dnotify) (context->user_data); |
---|
179 | |
---|
180 | g_strfreev (context->attr_names); |
---|
181 | g_strfreev (context->attr_values); |
---|
182 | |
---|
183 | g_slist_foreach (context->tag_stack, (GFunc)g_free, NULL); |
---|
184 | g_slist_free (context->tag_stack); |
---|
185 | |
---|
186 | if (context->partial_chunk) |
---|
187 | g_string_free (context->partial_chunk, TRUE); |
---|
188 | |
---|
189 | if (context->leftover_char_portion) |
---|
190 | g_string_free (context->leftover_char_portion, TRUE); |
---|
191 | |
---|
192 | g_free (context); |
---|
193 | } |
---|
194 | |
---|
195 | static void |
---|
196 | mark_error (GMarkupParseContext *context, |
---|
197 | GError *error) |
---|
198 | { |
---|
199 | context->state = STATE_ERROR; |
---|
200 | |
---|
201 | if (context->parser->error) |
---|
202 | (*context->parser->error) (context, error, context->user_data); |
---|
203 | } |
---|
204 | |
---|
205 | static void |
---|
206 | set_error (GMarkupParseContext *context, |
---|
207 | GError **error, |
---|
208 | GMarkupError code, |
---|
209 | const gchar *format, |
---|
210 | ...) |
---|
211 | { |
---|
212 | GError *tmp_error; |
---|
213 | gchar *s; |
---|
214 | va_list args; |
---|
215 | |
---|
216 | va_start (args, format); |
---|
217 | s = g_strdup_vprintf (format, args); |
---|
218 | va_end (args); |
---|
219 | |
---|
220 | tmp_error = g_error_new (G_MARKUP_ERROR, |
---|
221 | code, |
---|
222 | _("Error on line %d char %d: %s"), |
---|
223 | context->line_number, |
---|
224 | context->char_number, |
---|
225 | s); |
---|
226 | |
---|
227 | g_free (s); |
---|
228 | |
---|
229 | mark_error (context, tmp_error); |
---|
230 | |
---|
231 | g_propagate_error (error, tmp_error); |
---|
232 | } |
---|
233 | |
---|
234 | static gboolean |
---|
235 | is_name_start_char (gunichar c) |
---|
236 | { |
---|
237 | if (g_unichar_isalpha (c) || |
---|
238 | c == '_' || |
---|
239 | c == ':') |
---|
240 | return TRUE; |
---|
241 | else |
---|
242 | return FALSE; |
---|
243 | } |
---|
244 | |
---|
245 | static gboolean |
---|
246 | is_name_char (gunichar c) |
---|
247 | { |
---|
248 | if (g_unichar_isalnum (c) || |
---|
249 | c == '.' || |
---|
250 | c == '-' || |
---|
251 | c == '_' || |
---|
252 | c == ':') |
---|
253 | return TRUE; |
---|
254 | else |
---|
255 | return FALSE; |
---|
256 | } |
---|
257 | |
---|
258 | |
---|
259 | static gchar* |
---|
260 | char_str (gunichar c, |
---|
261 | gchar *buf) |
---|
262 | { |
---|
263 | memset (buf, 0, 7); |
---|
264 | g_unichar_to_utf8 (c, buf); |
---|
265 | return buf; |
---|
266 | } |
---|
267 | |
---|
268 | static gchar* |
---|
269 | utf8_str (const gchar *utf8, |
---|
270 | gchar *buf) |
---|
271 | { |
---|
272 | char_str (g_utf8_get_char (utf8), buf); |
---|
273 | return buf; |
---|
274 | } |
---|
275 | |
---|
276 | static void |
---|
277 | set_unescape_error (GMarkupParseContext *context, |
---|
278 | GError **error, |
---|
279 | const gchar *remaining_text, |
---|
280 | const gchar *remaining_text_end, |
---|
281 | GMarkupError code, |
---|
282 | const gchar *format, |
---|
283 | ...) |
---|
284 | { |
---|
285 | GError *tmp_error; |
---|
286 | gchar *s; |
---|
287 | va_list args; |
---|
288 | gint remaining_newlines; |
---|
289 | const gchar *p; |
---|
290 | |
---|
291 | remaining_newlines = 0; |
---|
292 | p = remaining_text; |
---|
293 | while (p != remaining_text_end) |
---|
294 | { |
---|
295 | if (*p == '\n') |
---|
296 | ++remaining_newlines; |
---|
297 | ++p; |
---|
298 | } |
---|
299 | |
---|
300 | va_start (args, format); |
---|
301 | s = g_strdup_vprintf (format, args); |
---|
302 | va_end (args); |
---|
303 | |
---|
304 | tmp_error = g_error_new (G_MARKUP_ERROR, |
---|
305 | code, |
---|
306 | _("Error on line %d: %s"), |
---|
307 | context->line_number - remaining_newlines, |
---|
308 | s); |
---|
309 | |
---|
310 | g_free (s); |
---|
311 | |
---|
312 | mark_error (context, tmp_error); |
---|
313 | |
---|
314 | g_propagate_error (error, tmp_error); |
---|
315 | } |
---|
316 | |
---|
317 | typedef enum |
---|
318 | { |
---|
319 | USTATE_INSIDE_TEXT, |
---|
320 | USTATE_AFTER_AMPERSAND, |
---|
321 | USTATE_INSIDE_ENTITY_NAME, |
---|
322 | USTATE_AFTER_CHARREF_HASH |
---|
323 | } UnescapeState; |
---|
324 | |
---|
325 | static gboolean |
---|
326 | unescape_text (GMarkupParseContext *context, |
---|
327 | const gchar *text, |
---|
328 | const gchar *text_end, |
---|
329 | gchar **unescaped, |
---|
330 | GError **error) |
---|
331 | { |
---|
332 | #define MAX_ENT_LEN 5 |
---|
333 | GString *str; |
---|
334 | const gchar *p; |
---|
335 | UnescapeState state; |
---|
336 | const gchar *start; |
---|
337 | |
---|
338 | str = g_string_new (""); |
---|
339 | |
---|
340 | state = USTATE_INSIDE_TEXT; |
---|
341 | p = text; |
---|
342 | start = p; |
---|
343 | while (p != text_end && context->state != STATE_ERROR) |
---|
344 | { |
---|
345 | g_assert (p < text_end); |
---|
346 | |
---|
347 | switch (state) |
---|
348 | { |
---|
349 | case USTATE_INSIDE_TEXT: |
---|
350 | { |
---|
351 | while (p != text_end && *p != '&') |
---|
352 | p = g_utf8_next_char (p); |
---|
353 | |
---|
354 | if (p != start) |
---|
355 | { |
---|
356 | g_string_append_len (str, start, p - start); |
---|
357 | |
---|
358 | start = NULL; |
---|
359 | } |
---|
360 | |
---|
361 | if (p != text_end && *p == '&') |
---|
362 | { |
---|
363 | p = g_utf8_next_char (p); |
---|
364 | state = USTATE_AFTER_AMPERSAND; |
---|
365 | } |
---|
366 | } |
---|
367 | break; |
---|
368 | |
---|
369 | case USTATE_AFTER_AMPERSAND: |
---|
370 | { |
---|
371 | if (*p == '#') |
---|
372 | { |
---|
373 | p = g_utf8_next_char (p); |
---|
374 | |
---|
375 | start = p; |
---|
376 | state = USTATE_AFTER_CHARREF_HASH; |
---|
377 | } |
---|
378 | else if (!is_name_start_char (g_utf8_get_char (p))) |
---|
379 | { |
---|
380 | if (*p == ';') |
---|
381 | { |
---|
382 | set_unescape_error (context, error, |
---|
383 | p, text_end, |
---|
384 | G_MARKUP_ERROR_PARSE, |
---|
385 | _("Empty entity '&;' seen; valid " |
---|
386 | "entities are: & " < > '")); |
---|
387 | } |
---|
388 | else |
---|
389 | { |
---|
390 | gchar buf[7]; |
---|
391 | |
---|
392 | set_unescape_error (context, error, |
---|
393 | p, text_end, |
---|
394 | G_MARKUP_ERROR_PARSE, |
---|
395 | _("Character '%s' is not valid at " |
---|
396 | "the start of an entity name; " |
---|
397 | "the & character begins an entity; " |
---|
398 | "if this ampersand isn't supposed " |
---|
399 | "to be an entity, escape it as " |
---|
400 | "&"), |
---|
401 | utf8_str (p, buf)); |
---|
402 | } |
---|
403 | } |
---|
404 | else |
---|
405 | { |
---|
406 | start = p; |
---|
407 | state = USTATE_INSIDE_ENTITY_NAME; |
---|
408 | } |
---|
409 | } |
---|
410 | break; |
---|
411 | |
---|
412 | |
---|
413 | case USTATE_INSIDE_ENTITY_NAME: |
---|
414 | { |
---|
415 | gchar buf[MAX_ENT_LEN+1] = { |
---|
416 | '\0', '\0', '\0', '\0', '\0', '\0' |
---|
417 | }; |
---|
418 | gchar *dest; |
---|
419 | |
---|
420 | while (p != text_end) |
---|
421 | { |
---|
422 | if (*p == ';') |
---|
423 | break; |
---|
424 | else if (!is_name_char (*p)) |
---|
425 | { |
---|
426 | gchar ubuf[7]; |
---|
427 | |
---|
428 | set_unescape_error (context, error, |
---|
429 | p, text_end, |
---|
430 | G_MARKUP_ERROR_PARSE, |
---|
431 | _("Character '%s' is not valid " |
---|
432 | "inside an entity name"), |
---|
433 | utf8_str (p, ubuf)); |
---|
434 | break; |
---|
435 | } |
---|
436 | |
---|
437 | p = g_utf8_next_char (p); |
---|
438 | } |
---|
439 | |
---|
440 | if (context->state != STATE_ERROR) |
---|
441 | { |
---|
442 | if (p != text_end) |
---|
443 | { |
---|
444 | const gchar *src; |
---|
445 | |
---|
446 | src = start; |
---|
447 | dest = buf; |
---|
448 | while (src != p) |
---|
449 | { |
---|
450 | *dest = *src; |
---|
451 | ++dest; |
---|
452 | ++src; |
---|
453 | } |
---|
454 | |
---|
455 | /* move to after semicolon */ |
---|
456 | p = g_utf8_next_char (p); |
---|
457 | start = p; |
---|
458 | state = USTATE_INSIDE_TEXT; |
---|
459 | |
---|
460 | if (strcmp (buf, "lt") == 0) |
---|
461 | g_string_append_c (str, '<'); |
---|
462 | else if (strcmp (buf, "gt") == 0) |
---|
463 | g_string_append_c (str, '>'); |
---|
464 | else if (strcmp (buf, "amp") == 0) |
---|
465 | g_string_append_c (str, '&'); |
---|
466 | else if (strcmp (buf, "quot") == 0) |
---|
467 | g_string_append_c (str, '"'); |
---|
468 | else if (strcmp (buf, "apos") == 0) |
---|
469 | g_string_append_c (str, '\''); |
---|
470 | else |
---|
471 | { |
---|
472 | set_unescape_error (context, error, |
---|
473 | p, text_end, |
---|
474 | G_MARKUP_ERROR_PARSE, |
---|
475 | _("Entity name '%s' is not known"), |
---|
476 | buf); |
---|
477 | } |
---|
478 | } |
---|
479 | else |
---|
480 | { |
---|
481 | set_unescape_error (context, error, |
---|
482 | /* give line number of the & */ |
---|
483 | start, text_end, |
---|
484 | G_MARKUP_ERROR_PARSE, |
---|
485 | _("Entity did not end with a semicolon; " |
---|
486 | "most likely you used an ampersand " |
---|
487 | "character without intending to start " |
---|
488 | "an entity - escape ampersand as &")); |
---|
489 | } |
---|
490 | } |
---|
491 | } |
---|
492 | break; |
---|
493 | |
---|
494 | case USTATE_AFTER_CHARREF_HASH: |
---|
495 | { |
---|
496 | gboolean is_hex = FALSE; |
---|
497 | if (*p == 'x') |
---|
498 | { |
---|
499 | is_hex = TRUE; |
---|
500 | p = g_utf8_next_char (p); |
---|
501 | start = p; |
---|
502 | } |
---|
503 | |
---|
504 | while (p != text_end && *p != ';') |
---|
505 | p = g_utf8_next_char (p); |
---|
506 | |
---|
507 | if (p != text_end) |
---|
508 | { |
---|
509 | g_assert (*p == ';'); |
---|
510 | |
---|
511 | /* digit is between start and p */ |
---|
512 | |
---|
513 | if (start != p) |
---|
514 | { |
---|
515 | gchar *digit = g_strndup (start, p - start); |
---|
516 | gulong l; |
---|
517 | gchar *end = NULL; |
---|
518 | gchar *digit_end = digit + (p - start); |
---|
519 | |
---|
520 | errno = 0; |
---|
521 | if (is_hex) |
---|
522 | l = strtoul (digit, &end, 16); |
---|
523 | else |
---|
524 | l = strtoul (digit, &end, 10); |
---|
525 | |
---|
526 | if (end != digit_end || errno != 0) |
---|
527 | { |
---|
528 | set_unescape_error (context, error, |
---|
529 | start, text_end, |
---|
530 | G_MARKUP_ERROR_PARSE, |
---|
531 | _("Failed to parse '%s', which " |
---|
532 | "should have been a digit " |
---|
533 | "inside a character reference " |
---|
534 | "(ê for example) - perhaps " |
---|
535 | "the digit is too large"), |
---|
536 | digit); |
---|
537 | } |
---|
538 | else |
---|
539 | { |
---|
540 | /* characters XML permits */ |
---|
541 | if (l == 0x9 || |
---|
542 | l == 0xA || |
---|
543 | l == 0xD || |
---|
544 | (l >= 0x20 && l <= 0xD7FF) || |
---|
545 | (l >= 0xE000 && l <= 0xFFFD) || |
---|
546 | (l >= 0x10000 && l <= 0x10FFFF)) |
---|
547 | { |
---|
548 | gchar buf[7]; |
---|
549 | g_string_append (str, char_str (l, buf)); |
---|
550 | } |
---|
551 | else |
---|
552 | { |
---|
553 | set_unescape_error (context, error, |
---|
554 | start, text_end, |
---|
555 | G_MARKUP_ERROR_PARSE, |
---|
556 | _("Character reference '%s' does not encode a permitted character"), |
---|
557 | digit); |
---|
558 | } |
---|
559 | } |
---|
560 | |
---|
561 | g_free (digit); |
---|
562 | |
---|
563 | /* Move to next state */ |
---|
564 | p = g_utf8_next_char (p); /* past semicolon */ |
---|
565 | start = p; |
---|
566 | state = USTATE_INSIDE_TEXT; |
---|
567 | } |
---|
568 | else |
---|
569 | { |
---|
570 | set_unescape_error (context, error, |
---|
571 | start, text_end, |
---|
572 | G_MARKUP_ERROR_PARSE, |
---|
573 | _("Empty character reference; " |
---|
574 | "should include a digit such as " |
---|
575 | "dž")); |
---|
576 | } |
---|
577 | } |
---|
578 | else |
---|
579 | { |
---|
580 | set_unescape_error (context, error, |
---|
581 | start, text_end, |
---|
582 | G_MARKUP_ERROR_PARSE, |
---|
583 | _("Character reference did not end with a " |
---|
584 | "semicolon; " |
---|
585 | "most likely you used an ampersand " |
---|
586 | "character without intending to start " |
---|
587 | "an entity - escape ampersand as &")); |
---|
588 | } |
---|
589 | } |
---|
590 | break; |
---|
591 | |
---|
592 | default: |
---|
593 | g_assert_not_reached (); |
---|
594 | break; |
---|
595 | } |
---|
596 | } |
---|
597 | |
---|
598 | if (context->state != STATE_ERROR) |
---|
599 | { |
---|
600 | switch (state) |
---|
601 | { |
---|
602 | case USTATE_INSIDE_TEXT: |
---|
603 | break; |
---|
604 | case USTATE_AFTER_AMPERSAND: |
---|
605 | case USTATE_INSIDE_ENTITY_NAME: |
---|
606 | set_unescape_error (context, error, |
---|
607 | NULL, NULL, |
---|
608 | G_MARKUP_ERROR_PARSE, |
---|
609 | _("Unfinished entity reference")); |
---|
610 | break; |
---|
611 | case USTATE_AFTER_CHARREF_HASH: |
---|
612 | set_unescape_error (context, error, |
---|
613 | NULL, NULL, |
---|
614 | G_MARKUP_ERROR_PARSE, |
---|
615 | _("Unfinished character reference")); |
---|
616 | break; |
---|
617 | } |
---|
618 | } |
---|
619 | |
---|
620 | if (context->state == STATE_ERROR) |
---|
621 | { |
---|
622 | g_string_free (str, TRUE); |
---|
623 | *unescaped = NULL; |
---|
624 | return FALSE; |
---|
625 | } |
---|
626 | else |
---|
627 | { |
---|
628 | *unescaped = g_string_free (str, FALSE); |
---|
629 | return TRUE; |
---|
630 | } |
---|
631 | |
---|
632 | #undef MAX_ENT_LEN |
---|
633 | } |
---|
634 | |
---|
635 | static gboolean |
---|
636 | advance_char (GMarkupParseContext *context) |
---|
637 | { |
---|
638 | |
---|
639 | context->iter = g_utf8_next_char (context->iter); |
---|
640 | context->char_number += 1; |
---|
641 | if (*context->iter == '\n') |
---|
642 | { |
---|
643 | context->line_number += 1; |
---|
644 | context->char_number = 1; |
---|
645 | } |
---|
646 | |
---|
647 | return context->iter != context->current_text_end; |
---|
648 | } |
---|
649 | |
---|
650 | static gboolean |
---|
651 | xml_isspace (char c) |
---|
652 | { |
---|
653 | return c == ' ' || c == '\t' || c == '\n' || c == '\r'; |
---|
654 | } |
---|
655 | |
---|
656 | static void |
---|
657 | skip_spaces (GMarkupParseContext *context) |
---|
658 | { |
---|
659 | do |
---|
660 | { |
---|
661 | if (!xml_isspace (*context->iter)) |
---|
662 | return; |
---|
663 | } |
---|
664 | while (advance_char (context)); |
---|
665 | } |
---|
666 | |
---|
667 | static void |
---|
668 | advance_to_name_end (GMarkupParseContext *context) |
---|
669 | { |
---|
670 | do |
---|
671 | { |
---|
672 | if (!is_name_char (g_utf8_get_char (context->iter))) |
---|
673 | return; |
---|
674 | } |
---|
675 | while (advance_char (context)); |
---|
676 | } |
---|
677 | |
---|
678 | static void |
---|
679 | add_to_partial (GMarkupParseContext *context, |
---|
680 | const gchar *text_start, |
---|
681 | const gchar *text_end) |
---|
682 | { |
---|
683 | if (context->partial_chunk == NULL) |
---|
684 | context->partial_chunk = g_string_new (""); |
---|
685 | |
---|
686 | if (text_start != text_end) |
---|
687 | g_string_append_len (context->partial_chunk, text_start, |
---|
688 | text_end - text_start); |
---|
689 | |
---|
690 | /* Invariant here that partial_chunk exists */ |
---|
691 | } |
---|
692 | |
---|
693 | static void |
---|
694 | truncate_partial (GMarkupParseContext *context) |
---|
695 | { |
---|
696 | if (context->partial_chunk != NULL) |
---|
697 | { |
---|
698 | context->partial_chunk = g_string_truncate (context->partial_chunk, 0); |
---|
699 | } |
---|
700 | } |
---|
701 | |
---|
702 | static const gchar* |
---|
703 | current_element (GMarkupParseContext *context) |
---|
704 | { |
---|
705 | return context->tag_stack->data; |
---|
706 | } |
---|
707 | |
---|
708 | static const gchar* |
---|
709 | current_attribute (GMarkupParseContext *context) |
---|
710 | { |
---|
711 | g_assert (context->cur_attr >= 0); |
---|
712 | return context->attr_names[context->cur_attr]; |
---|
713 | } |
---|
714 | |
---|
715 | static void |
---|
716 | find_current_text_end (GMarkupParseContext *context) |
---|
717 | { |
---|
718 | /* This function must be safe (non-segfaulting) on invalid UTF8 */ |
---|
719 | const gchar *end = context->current_text + context->current_text_len; |
---|
720 | const gchar *p; |
---|
721 | const gchar *next; |
---|
722 | |
---|
723 | g_assert (context->current_text_len > 0); |
---|
724 | |
---|
725 | p = context->current_text; |
---|
726 | next = g_utf8_find_next_char (p, end); |
---|
727 | |
---|
728 | while (next && *next) |
---|
729 | { |
---|
730 | if (p == next) |
---|
731 | next++; |
---|
732 | p = next; |
---|
733 | next = g_utf8_find_next_char (p, end); |
---|
734 | } |
---|
735 | |
---|
736 | /* p is now the start of the last character or character portion. */ |
---|
737 | g_assert (p != end); |
---|
738 | next = g_utf8_next_char (p); /* this only touches *p, nothing beyond */ |
---|
739 | |
---|
740 | if (next == end) |
---|
741 | { |
---|
742 | /* whole character */ |
---|
743 | context->current_text_end = end; |
---|
744 | } |
---|
745 | else |
---|
746 | { |
---|
747 | /* portion */ |
---|
748 | context->leftover_char_portion = g_string_new_len (p, end - p); |
---|
749 | context->current_text_len -= (end - p); |
---|
750 | context->current_text_end = p; |
---|
751 | } |
---|
752 | } |
---|
753 | |
---|
754 | static void |
---|
755 | add_attribute (GMarkupParseContext *context, char *name) |
---|
756 | { |
---|
757 | if (context->cur_attr + 2 >= context->alloc_attrs) |
---|
758 | { |
---|
759 | context->alloc_attrs += 5; /* silly magic number */ |
---|
760 | context->attr_names = g_realloc (context->attr_names, sizeof(char*)*context->alloc_attrs); |
---|
761 | context->attr_values = g_realloc (context->attr_values, sizeof(char*)*context->alloc_attrs); |
---|
762 | } |
---|
763 | context->cur_attr++; |
---|
764 | context->attr_names[context->cur_attr] = name; |
---|
765 | context->attr_values[context->cur_attr] = NULL; |
---|
766 | context->attr_names[context->cur_attr+1] = NULL; |
---|
767 | context->attr_values[context->cur_attr+1] = NULL; |
---|
768 | } |
---|
769 | |
---|
770 | /** |
---|
771 | * g_markup_parse_context_parse: |
---|
772 | * @context: a #GMarkupParseContext |
---|
773 | * @text: chunk of text to parse |
---|
774 | * @text_len: length of @text in bytes |
---|
775 | * @error: return location for a #GError |
---|
776 | * |
---|
777 | * Feed some data to the #GMarkupParseContext. The data need not |
---|
778 | * be valid UTF-8; an error will be signaled if it's invalid. |
---|
779 | * The data need not be an entire document; you can feed a document |
---|
780 | * into the parser incrementally, via multiple calls to this function. |
---|
781 | * Typically, as you receive data from a network connection or file, |
---|
782 | * you feed each received chunk of data into this function, aborting |
---|
783 | * the process if an error occurs. Once an error is reported, no further |
---|
784 | * data may be fed to the #GMarkupParseContext; all errors are fatal. |
---|
785 | * |
---|
786 | * Return value: %FALSE if an error occurred, %TRUE on success |
---|
787 | **/ |
---|
788 | gboolean |
---|
789 | g_markup_parse_context_parse (GMarkupParseContext *context, |
---|
790 | const gchar *text, |
---|
791 | gssize text_len, |
---|
792 | GError **error) |
---|
793 | { |
---|
794 | const gchar *first_invalid; |
---|
795 | |
---|
796 | g_return_val_if_fail (context != NULL, FALSE); |
---|
797 | g_return_val_if_fail (text != NULL, FALSE); |
---|
798 | g_return_val_if_fail (context->state != STATE_ERROR, FALSE); |
---|
799 | g_return_val_if_fail (!context->parsing, FALSE); |
---|
800 | |
---|
801 | if (text_len < 0) |
---|
802 | text_len = strlen (text); |
---|
803 | |
---|
804 | if (text_len == 0) |
---|
805 | return TRUE; |
---|
806 | |
---|
807 | context->parsing = TRUE; |
---|
808 | |
---|
809 | if (context->leftover_char_portion) |
---|
810 | { |
---|
811 | const gchar *first_char; |
---|
812 | |
---|
813 | if ((*text & 0xc0) != 0x80) |
---|
814 | first_char = text; |
---|
815 | else |
---|
816 | first_char = g_utf8_find_next_char (text, text + text_len); |
---|
817 | |
---|
818 | if (first_char) |
---|
819 | { |
---|
820 | /* leftover_char_portion was completed. Parse it. */ |
---|
821 | GString *portion = context->leftover_char_portion; |
---|
822 | |
---|
823 | g_string_append_len (context->leftover_char_portion, |
---|
824 | text, first_char - text); |
---|
825 | |
---|
826 | /* hacks to allow recursion */ |
---|
827 | context->parsing = FALSE; |
---|
828 | context->leftover_char_portion = NULL; |
---|
829 | |
---|
830 | if (!g_markup_parse_context_parse (context, |
---|
831 | portion->str, portion->len, |
---|
832 | error)) |
---|
833 | { |
---|
834 | g_assert (context->state == STATE_ERROR); |
---|
835 | } |
---|
836 | |
---|
837 | g_string_free (portion, TRUE); |
---|
838 | context->parsing = TRUE; |
---|
839 | |
---|
840 | /* Skip the fraction of char that was in this text */ |
---|
841 | text_len -= (first_char - text); |
---|
842 | text = first_char; |
---|
843 | } |
---|
844 | else |
---|
845 | { |
---|
846 | /* another little chunk of the leftover char; geez |
---|
847 | * someone is inefficient. |
---|
848 | */ |
---|
849 | g_string_append_len (context->leftover_char_portion, |
---|
850 | text, text_len); |
---|
851 | |
---|
852 | if (context->leftover_char_portion->len > 7) |
---|
853 | { |
---|
854 | /* The leftover char portion is too big to be |
---|
855 | * a UTF-8 character |
---|
856 | */ |
---|
857 | set_error (context, |
---|
858 | error, |
---|
859 | G_MARKUP_ERROR_BAD_UTF8, |
---|
860 | _("Invalid UTF-8 encoded text")); |
---|
861 | } |
---|
862 | |
---|
863 | goto finished; |
---|
864 | } |
---|
865 | } |
---|
866 | |
---|
867 | context->current_text = text; |
---|
868 | context->current_text_len = text_len; |
---|
869 | context->iter = context->current_text; |
---|
870 | context->start = context->iter; |
---|
871 | |
---|
872 | /* Nothing left after finishing the leftover char, or nothing |
---|
873 | * passed in to begin with. |
---|
874 | */ |
---|
875 | if (context->current_text_len == 0) |
---|
876 | goto finished; |
---|
877 | |
---|
878 | /* find_current_text_end () assumes the string starts at |
---|
879 | * a character start, so we need to validate at least |
---|
880 | * that much. It doesn't assume any following bytes |
---|
881 | * are valid. |
---|
882 | */ |
---|
883 | if ((*context->current_text & 0xc0) == 0x80) /* not a char start */ |
---|
884 | { |
---|
885 | set_error (context, |
---|
886 | error, |
---|
887 | G_MARKUP_ERROR_BAD_UTF8, |
---|
888 | _("Invalid UTF-8 encoded text")); |
---|
889 | goto finished; |
---|
890 | } |
---|
891 | |
---|
892 | /* Initialize context->current_text_end, possibly adjusting |
---|
893 | * current_text_len, and add any leftover char portion |
---|
894 | */ |
---|
895 | find_current_text_end (context); |
---|
896 | |
---|
897 | /* Validate UTF8 (must be done after we find the end, since |
---|
898 | * we could have a trailing incomplete char) |
---|
899 | */ |
---|
900 | if (!g_utf8_validate (context->current_text, |
---|
901 | context->current_text_len, |
---|
902 | &first_invalid)) |
---|
903 | { |
---|
904 | gint newlines = 0; |
---|
905 | const gchar *p; |
---|
906 | p = context->current_text; |
---|
907 | while (p != context->current_text_end) |
---|
908 | { |
---|
909 | if (*p == '\n') |
---|
910 | ++newlines; |
---|
911 | ++p; |
---|
912 | } |
---|
913 | |
---|
914 | context->line_number += newlines; |
---|
915 | |
---|
916 | set_error (context, |
---|
917 | error, |
---|
918 | G_MARKUP_ERROR_BAD_UTF8, |
---|
919 | _("Invalid UTF-8 encoded text")); |
---|
920 | goto finished; |
---|
921 | } |
---|
922 | |
---|
923 | while (context->iter != context->current_text_end) |
---|
924 | { |
---|
925 | switch (context->state) |
---|
926 | { |
---|
927 | case STATE_START: |
---|
928 | /* Possible next state: AFTER_OPEN_ANGLE */ |
---|
929 | |
---|
930 | g_assert (context->tag_stack == NULL); |
---|
931 | |
---|
932 | /* whitespace is ignored outside of any elements */ |
---|
933 | skip_spaces (context); |
---|
934 | |
---|
935 | if (context->iter != context->current_text_end) |
---|
936 | { |
---|
937 | if (*context->iter == '<') |
---|
938 | { |
---|
939 | /* Move after the open angle */ |
---|
940 | advance_char (context); |
---|
941 | |
---|
942 | context->state = STATE_AFTER_OPEN_ANGLE; |
---|
943 | |
---|
944 | /* this could start a passthrough */ |
---|
945 | context->start = context->iter; |
---|
946 | |
---|
947 | /* document is now non-empty */ |
---|
948 | context->document_empty = FALSE; |
---|
949 | } |
---|
950 | else |
---|
951 | { |
---|
952 | set_error (context, |
---|
953 | error, |
---|
954 | G_MARKUP_ERROR_PARSE, |
---|
955 | _("Document must begin with an element (e.g. <book>)")); |
---|
956 | } |
---|
957 | } |
---|
958 | break; |
---|
959 | |
---|
960 | case STATE_AFTER_OPEN_ANGLE: |
---|
961 | /* Possible next states: INSIDE_OPEN_TAG_NAME, |
---|
962 | * AFTER_CLOSE_TAG_SLASH, INSIDE_PASSTHROUGH |
---|
963 | */ |
---|
964 | if (*context->iter == '?' || |
---|
965 | *context->iter == '!') |
---|
966 | { |
---|
967 | /* include < in the passthrough */ |
---|
968 | const gchar *openangle = "<"; |
---|
969 | add_to_partial (context, openangle, openangle + 1); |
---|
970 | context->start = context->iter; |
---|
971 | context->balance = 1; |
---|
972 | context->state = STATE_INSIDE_PASSTHROUGH; |
---|
973 | } |
---|
974 | else if (*context->iter == '/') |
---|
975 | { |
---|
976 | /* move after it */ |
---|
977 | advance_char (context); |
---|
978 | |
---|
979 | context->state = STATE_AFTER_CLOSE_TAG_SLASH; |
---|
980 | } |
---|
981 | else if (is_name_start_char (g_utf8_get_char (context->iter))) |
---|
982 | { |
---|
983 | context->state = STATE_INSIDE_OPEN_TAG_NAME; |
---|
984 | |
---|
985 | /* start of tag name */ |
---|
986 | context->start = context->iter; |
---|
987 | } |
---|
988 | else |
---|
989 | { |
---|
990 | gchar buf[7]; |
---|
991 | set_error (context, |
---|
992 | error, |
---|
993 | G_MARKUP_ERROR_PARSE, |
---|
994 | _("'%s' is not a valid character following " |
---|
995 | "a '<' character; it may not begin an " |
---|
996 | "element name"), |
---|
997 | utf8_str (context->iter, buf)); |
---|
998 | } |
---|
999 | break; |
---|
1000 | |
---|
1001 | /* The AFTER_CLOSE_ANGLE state is actually sort of |
---|
1002 | * broken, because it doesn't correspond to a range |
---|
1003 | * of characters in the input stream as the others do, |
---|
1004 | * and thus makes things harder to conceptualize |
---|
1005 | */ |
---|
1006 | case STATE_AFTER_CLOSE_ANGLE: |
---|
1007 | /* Possible next states: INSIDE_TEXT, STATE_START */ |
---|
1008 | if (context->tag_stack == NULL) |
---|
1009 | { |
---|
1010 | context->start = NULL; |
---|
1011 | context->state = STATE_START; |
---|
1012 | } |
---|
1013 | else |
---|
1014 | { |
---|
1015 | context->start = context->iter; |
---|
1016 | context->state = STATE_INSIDE_TEXT; |
---|
1017 | } |
---|
1018 | break; |
---|
1019 | |
---|
1020 | case STATE_AFTER_ELISION_SLASH: |
---|
1021 | /* Possible next state: AFTER_CLOSE_ANGLE */ |
---|
1022 | |
---|
1023 | { |
---|
1024 | /* We need to pop the tag stack and call the end_element |
---|
1025 | * function, since this is the close tag |
---|
1026 | */ |
---|
1027 | GError *tmp_error = NULL; |
---|
1028 | |
---|
1029 | g_assert (context->tag_stack != NULL); |
---|
1030 | |
---|
1031 | tmp_error = NULL; |
---|
1032 | if (context->parser->end_element) |
---|
1033 | (* context->parser->end_element) (context, |
---|
1034 | context->tag_stack->data, |
---|
1035 | context->user_data, |
---|
1036 | &tmp_error); |
---|
1037 | |
---|
1038 | if (tmp_error) |
---|
1039 | { |
---|
1040 | mark_error (context, tmp_error); |
---|
1041 | g_propagate_error (error, tmp_error); |
---|
1042 | } |
---|
1043 | else |
---|
1044 | { |
---|
1045 | if (*context->iter == '>') |
---|
1046 | { |
---|
1047 | /* move after the close angle */ |
---|
1048 | advance_char (context); |
---|
1049 | context->state = STATE_AFTER_CLOSE_ANGLE; |
---|
1050 | } |
---|
1051 | else |
---|
1052 | { |
---|
1053 | gchar buf[7]; |
---|
1054 | set_error (context, |
---|
1055 | error, |
---|
1056 | G_MARKUP_ERROR_PARSE, |
---|
1057 | _("Odd character '%s', expected a '>' character " |
---|
1058 | "to end the start tag of element '%s'"), |
---|
1059 | utf8_str (context->iter, buf), |
---|
1060 | current_element (context)); |
---|
1061 | } |
---|
1062 | } |
---|
1063 | |
---|
1064 | g_free (context->tag_stack->data); |
---|
1065 | context->tag_stack = g_slist_delete_link (context->tag_stack, |
---|
1066 | context->tag_stack); |
---|
1067 | } |
---|
1068 | break; |
---|
1069 | |
---|
1070 | case STATE_INSIDE_OPEN_TAG_NAME: |
---|
1071 | /* Possible next states: BETWEEN_ATTRIBUTES */ |
---|
1072 | |
---|
1073 | /* if there's a partial chunk then it's the first part of the |
---|
1074 | * tag name. If there's a context->start then it's the start |
---|
1075 | * of the tag name in current_text, the partial chunk goes |
---|
1076 | * before that start though. |
---|
1077 | */ |
---|
1078 | advance_to_name_end (context); |
---|
1079 | |
---|
1080 | if (context->iter == context->current_text_end) |
---|
1081 | { |
---|
1082 | /* The name hasn't necessarily ended. Merge with |
---|
1083 | * partial chunk, leave state unchanged. |
---|
1084 | */ |
---|
1085 | add_to_partial (context, context->start, context->iter); |
---|
1086 | } |
---|
1087 | else |
---|
1088 | { |
---|
1089 | /* The name has ended. Combine it with the partial chunk |
---|
1090 | * if any; push it on the stack; enter next state. |
---|
1091 | */ |
---|
1092 | add_to_partial (context, context->start, context->iter); |
---|
1093 | context->tag_stack = |
---|
1094 | g_slist_prepend (context->tag_stack, |
---|
1095 | g_string_free (context->partial_chunk, |
---|
1096 | FALSE)); |
---|
1097 | |
---|
1098 | context->partial_chunk = NULL; |
---|
1099 | |
---|
1100 | context->state = STATE_BETWEEN_ATTRIBUTES; |
---|
1101 | context->start = NULL; |
---|
1102 | } |
---|
1103 | break; |
---|
1104 | |
---|
1105 | case STATE_INSIDE_ATTRIBUTE_NAME: |
---|
1106 | /* Possible next states: AFTER_ATTRIBUTE_EQUALS_SIGN */ |
---|
1107 | |
---|
1108 | /* read the full name, if we enter the equals sign state |
---|
1109 | * then add the attribute to the list (without the value), |
---|
1110 | * otherwise store a partial chunk to be prepended later. |
---|
1111 | */ |
---|
1112 | advance_to_name_end (context); |
---|
1113 | |
---|
1114 | if (context->iter == context->current_text_end) |
---|
1115 | { |
---|
1116 | /* The name hasn't necessarily ended. Merge with |
---|
1117 | * partial chunk, leave state unchanged. |
---|
1118 | */ |
---|
1119 | add_to_partial (context, context->start, context->iter); |
---|
1120 | } |
---|
1121 | else |
---|
1122 | { |
---|
1123 | /* The name has ended. Combine it with the partial chunk |
---|
1124 | * if any; push it on the stack; enter next state. |
---|
1125 | */ |
---|
1126 | add_to_partial (context, context->start, context->iter); |
---|
1127 | |
---|
1128 | add_attribute (context, g_string_free (context->partial_chunk, FALSE)); |
---|
1129 | |
---|
1130 | context->partial_chunk = NULL; |
---|
1131 | context->start = NULL; |
---|
1132 | |
---|
1133 | if (*context->iter == '=') |
---|
1134 | { |
---|
1135 | advance_char (context); |
---|
1136 | context->state = STATE_AFTER_ATTRIBUTE_EQUALS_SIGN; |
---|
1137 | } |
---|
1138 | else |
---|
1139 | { |
---|
1140 | gchar buf[7]; |
---|
1141 | set_error (context, |
---|
1142 | error, |
---|
1143 | G_MARKUP_ERROR_PARSE, |
---|
1144 | _("Odd character '%s', expected a '=' after " |
---|
1145 | "attribute name '%s' of element '%s'"), |
---|
1146 | utf8_str (context->iter, buf), |
---|
1147 | current_attribute (context), |
---|
1148 | current_element (context)); |
---|
1149 | |
---|
1150 | } |
---|
1151 | } |
---|
1152 | break; |
---|
1153 | |
---|
1154 | case STATE_BETWEEN_ATTRIBUTES: |
---|
1155 | /* Possible next states: AFTER_CLOSE_ANGLE, |
---|
1156 | * AFTER_ELISION_SLASH, INSIDE_ATTRIBUTE_NAME |
---|
1157 | */ |
---|
1158 | skip_spaces (context); |
---|
1159 | |
---|
1160 | if (context->iter != context->current_text_end) |
---|
1161 | { |
---|
1162 | if (*context->iter == '/') |
---|
1163 | { |
---|
1164 | advance_char (context); |
---|
1165 | context->state = STATE_AFTER_ELISION_SLASH; |
---|
1166 | } |
---|
1167 | else if (*context->iter == '>') |
---|
1168 | { |
---|
1169 | |
---|
1170 | advance_char (context); |
---|
1171 | context->state = STATE_AFTER_CLOSE_ANGLE; |
---|
1172 | } |
---|
1173 | else if (is_name_start_char (g_utf8_get_char (context->iter))) |
---|
1174 | { |
---|
1175 | context->state = STATE_INSIDE_ATTRIBUTE_NAME; |
---|
1176 | /* start of attribute name */ |
---|
1177 | context->start = context->iter; |
---|
1178 | } |
---|
1179 | else |
---|
1180 | { |
---|
1181 | gchar buf[7]; |
---|
1182 | set_error (context, |
---|
1183 | error, |
---|
1184 | G_MARKUP_ERROR_PARSE, |
---|
1185 | _("Odd character '%s', expected a '>' or '/' " |
---|
1186 | "character to end the start tag of " |
---|
1187 | "element '%s', or optionally an attribute; " |
---|
1188 | "perhaps you used an invalid character in " |
---|
1189 | "an attribute name"), |
---|
1190 | utf8_str (context->iter, buf), |
---|
1191 | current_element (context)); |
---|
1192 | } |
---|
1193 | |
---|
1194 | /* If we're done with attributes, invoke |
---|
1195 | * the start_element callback |
---|
1196 | */ |
---|
1197 | if (context->state == STATE_AFTER_ELISION_SLASH || |
---|
1198 | context->state == STATE_AFTER_CLOSE_ANGLE) |
---|
1199 | { |
---|
1200 | const gchar *start_name; |
---|
1201 | /* Ugly, but the current code expects an empty array instead of NULL */ |
---|
1202 | const gchar *empty = NULL; |
---|
1203 | const gchar **attr_names = ∅ |
---|
1204 | const gchar **attr_values = ∅ |
---|
1205 | GError *tmp_error; |
---|
1206 | |
---|
1207 | /* Call user callback for element start */ |
---|
1208 | start_name = current_element (context); |
---|
1209 | |
---|
1210 | if (context->cur_attr >= 0) |
---|
1211 | { |
---|
1212 | attr_names = (const gchar**)context->attr_names; |
---|
1213 | attr_values = (const gchar**)context->attr_values; |
---|
1214 | } |
---|
1215 | |
---|
1216 | tmp_error = NULL; |
---|
1217 | if (context->parser->start_element) |
---|
1218 | (* context->parser->start_element) (context, |
---|
1219 | start_name, |
---|
1220 | (const gchar **)attr_names, |
---|
1221 | (const gchar **)attr_values, |
---|
1222 | context->user_data, |
---|
1223 | &tmp_error); |
---|
1224 | |
---|
1225 | /* Go ahead and free the attributes. */ |
---|
1226 | for (; context->cur_attr >= 0; context->cur_attr--) |
---|
1227 | { |
---|
1228 | int pos = context->cur_attr; |
---|
1229 | g_free (context->attr_names[pos]); |
---|
1230 | g_free (context->attr_values[pos]); |
---|
1231 | context->attr_names[pos] = context->attr_values[pos] = NULL; |
---|
1232 | } |
---|
1233 | g_assert (context->cur_attr == -1); |
---|
1234 | g_assert (context->attr_names == NULL || |
---|
1235 | context->attr_names[0] == NULL); |
---|
1236 | g_assert (context->attr_values == NULL || |
---|
1237 | context->attr_values[0] == NULL); |
---|
1238 | |
---|
1239 | if (tmp_error != NULL) |
---|
1240 | { |
---|
1241 | mark_error (context, tmp_error); |
---|
1242 | g_propagate_error (error, tmp_error); |
---|
1243 | } |
---|
1244 | } |
---|
1245 | } |
---|
1246 | break; |
---|
1247 | |
---|
1248 | case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN: |
---|
1249 | /* Possible next state: INSIDE_ATTRIBUTE_VALUE_[SQ/DQ] */ |
---|
1250 | if (*context->iter == '"') |
---|
1251 | { |
---|
1252 | advance_char (context); |
---|
1253 | context->state = STATE_INSIDE_ATTRIBUTE_VALUE_DQ; |
---|
1254 | context->start = context->iter; |
---|
1255 | } |
---|
1256 | else if (*context->iter == '\'') |
---|
1257 | { |
---|
1258 | advance_char (context); |
---|
1259 | context->state = STATE_INSIDE_ATTRIBUTE_VALUE_SQ; |
---|
1260 | context->start = context->iter; |
---|
1261 | } |
---|
1262 | else |
---|
1263 | { |
---|
1264 | gchar buf[7]; |
---|
1265 | set_error (context, |
---|
1266 | error, |
---|
1267 | G_MARKUP_ERROR_PARSE, |
---|
1268 | _("Odd character '%s', expected an open quote mark " |
---|
1269 | "after the equals sign when giving value for " |
---|
1270 | "attribute '%s' of element '%s'"), |
---|
1271 | utf8_str (context->iter, buf), |
---|
1272 | current_attribute (context), |
---|
1273 | current_element (context)); |
---|
1274 | } |
---|
1275 | break; |
---|
1276 | |
---|
1277 | case STATE_INSIDE_ATTRIBUTE_VALUE_SQ: |
---|
1278 | case STATE_INSIDE_ATTRIBUTE_VALUE_DQ: |
---|
1279 | /* Possible next states: BETWEEN_ATTRIBUTES */ |
---|
1280 | { |
---|
1281 | gchar delim; |
---|
1282 | |
---|
1283 | if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ) |
---|
1284 | { |
---|
1285 | delim = '\''; |
---|
1286 | } |
---|
1287 | else |
---|
1288 | { |
---|
1289 | delim = '"'; |
---|
1290 | } |
---|
1291 | |
---|
1292 | do |
---|
1293 | { |
---|
1294 | if (*context->iter == delim) |
---|
1295 | break; |
---|
1296 | } |
---|
1297 | while (advance_char (context)); |
---|
1298 | } |
---|
1299 | if (context->iter == context->current_text_end) |
---|
1300 | { |
---|
1301 | /* The value hasn't necessarily ended. Merge with |
---|
1302 | * partial chunk, leave state unchanged. |
---|
1303 | */ |
---|
1304 | add_to_partial (context, context->start, context->iter); |
---|
1305 | } |
---|
1306 | else |
---|
1307 | { |
---|
1308 | /* The value has ended at the quote mark. Combine it |
---|
1309 | * with the partial chunk if any; set it for the current |
---|
1310 | * attribute. |
---|
1311 | */ |
---|
1312 | add_to_partial (context, context->start, context->iter); |
---|
1313 | |
---|
1314 | g_assert (context->cur_attr >= 0); |
---|
1315 | |
---|
1316 | if (unescape_text (context, |
---|
1317 | context->partial_chunk->str, |
---|
1318 | context->partial_chunk->str + |
---|
1319 | context->partial_chunk->len, |
---|
1320 | &context->attr_values[context->cur_attr], |
---|
1321 | error)) |
---|
1322 | { |
---|
1323 | /* success, advance past quote and set state. */ |
---|
1324 | advance_char (context); |
---|
1325 | context->state = STATE_BETWEEN_ATTRIBUTES; |
---|
1326 | context->start = NULL; |
---|
1327 | } |
---|
1328 | |
---|
1329 | truncate_partial (context); |
---|
1330 | } |
---|
1331 | break; |
---|
1332 | |
---|
1333 | case STATE_INSIDE_TEXT: |
---|
1334 | /* Possible next states: AFTER_OPEN_ANGLE */ |
---|
1335 | do |
---|
1336 | { |
---|
1337 | if (*context->iter == '<') |
---|
1338 | break; |
---|
1339 | } |
---|
1340 | while (advance_char (context)); |
---|
1341 | |
---|
1342 | /* The text hasn't necessarily ended. Merge with |
---|
1343 | * partial chunk, leave state unchanged. |
---|
1344 | */ |
---|
1345 | |
---|
1346 | add_to_partial (context, context->start, context->iter); |
---|
1347 | |
---|
1348 | if (context->iter != context->current_text_end) |
---|
1349 | { |
---|
1350 | gchar *unescaped = NULL; |
---|
1351 | |
---|
1352 | /* The text has ended at the open angle. Call the text |
---|
1353 | * callback. |
---|
1354 | */ |
---|
1355 | |
---|
1356 | if (unescape_text (context, |
---|
1357 | context->partial_chunk->str, |
---|
1358 | context->partial_chunk->str + |
---|
1359 | context->partial_chunk->len, |
---|
1360 | &unescaped, |
---|
1361 | error)) |
---|
1362 | { |
---|
1363 | GError *tmp_error = NULL; |
---|
1364 | |
---|
1365 | if (context->parser->text) |
---|
1366 | (*context->parser->text) (context, |
---|
1367 | unescaped, |
---|
1368 | strlen (unescaped), |
---|
1369 | context->user_data, |
---|
1370 | &tmp_error); |
---|
1371 | |
---|
1372 | g_free (unescaped); |
---|
1373 | |
---|
1374 | if (tmp_error == NULL) |
---|
1375 | { |
---|
1376 | /* advance past open angle and set state. */ |
---|
1377 | advance_char (context); |
---|
1378 | context->state = STATE_AFTER_OPEN_ANGLE; |
---|
1379 | /* could begin a passthrough */ |
---|
1380 | context->start = context->iter; |
---|
1381 | } |
---|
1382 | else |
---|
1383 | { |
---|
1384 | mark_error (context, tmp_error); |
---|
1385 | g_propagate_error (error, tmp_error); |
---|
1386 | } |
---|
1387 | } |
---|
1388 | |
---|
1389 | truncate_partial (context); |
---|
1390 | } |
---|
1391 | break; |
---|
1392 | |
---|
1393 | case STATE_AFTER_CLOSE_TAG_SLASH: |
---|
1394 | /* Possible next state: INSIDE_CLOSE_TAG_NAME */ |
---|
1395 | if (is_name_start_char (g_utf8_get_char (context->iter))) |
---|
1396 | { |
---|
1397 | context->state = STATE_INSIDE_CLOSE_TAG_NAME; |
---|
1398 | |
---|
1399 | /* start of tag name */ |
---|
1400 | context->start = context->iter; |
---|
1401 | } |
---|
1402 | else |
---|
1403 | { |
---|
1404 | gchar buf[7]; |
---|
1405 | set_error (context, |
---|
1406 | error, |
---|
1407 | G_MARKUP_ERROR_PARSE, |
---|
1408 | _("'%s' is not a valid character following " |
---|
1409 | "the characters '</'; '%s' may not begin an " |
---|
1410 | "element name"), |
---|
1411 | utf8_str (context->iter, buf), |
---|
1412 | utf8_str (context->iter, buf)); |
---|
1413 | } |
---|
1414 | break; |
---|
1415 | |
---|
1416 | case STATE_INSIDE_CLOSE_TAG_NAME: |
---|
1417 | /* Possible next state: AFTER_CLOSE_ANGLE */ |
---|
1418 | advance_to_name_end (context); |
---|
1419 | |
---|
1420 | if (context->iter == context->current_text_end) |
---|
1421 | { |
---|
1422 | /* The name hasn't necessarily ended. Merge with |
---|
1423 | * partial chunk, leave state unchanged. |
---|
1424 | */ |
---|
1425 | add_to_partial (context, context->start, context->iter); |
---|
1426 | } |
---|
1427 | else |
---|
1428 | { |
---|
1429 | /* The name has ended. Combine it with the partial chunk |
---|
1430 | * if any; check that it matches stack top and pop |
---|
1431 | * stack; invoke proper callback; enter next state. |
---|
1432 | */ |
---|
1433 | gchar *close_name; |
---|
1434 | |
---|
1435 | add_to_partial (context, context->start, context->iter); |
---|
1436 | |
---|
1437 | close_name = g_string_free (context->partial_chunk, FALSE); |
---|
1438 | context->partial_chunk = NULL; |
---|
1439 | |
---|
1440 | if (*context->iter != '>') |
---|
1441 | { |
---|
1442 | gchar buf[7]; |
---|
1443 | set_error (context, |
---|
1444 | error, |
---|
1445 | G_MARKUP_ERROR_PARSE, |
---|
1446 | _("'%s' is not a valid character following " |
---|
1447 | "the close element name '%s'; the allowed " |
---|
1448 | "character is '>'"), |
---|
1449 | utf8_str (context->iter, buf), |
---|
1450 | close_name); |
---|
1451 | } |
---|
1452 | else if (context->tag_stack == NULL) |
---|
1453 | { |
---|
1454 | set_error (context, |
---|
1455 | error, |
---|
1456 | G_MARKUP_ERROR_PARSE, |
---|
1457 | _("Element '%s' was closed, no element " |
---|
1458 | "is currently open"), |
---|
1459 | close_name); |
---|
1460 | } |
---|
1461 | else if (strcmp (close_name, current_element (context)) != 0) |
---|
1462 | { |
---|
1463 | set_error (context, |
---|
1464 | error, |
---|
1465 | G_MARKUP_ERROR_PARSE, |
---|
1466 | _("Element '%s' was closed, but the currently " |
---|
1467 | "open element is '%s'"), |
---|
1468 | close_name, |
---|
1469 | current_element (context)); |
---|
1470 | } |
---|
1471 | else |
---|
1472 | { |
---|
1473 | GError *tmp_error; |
---|
1474 | advance_char (context); |
---|
1475 | context->state = STATE_AFTER_CLOSE_ANGLE; |
---|
1476 | context->start = NULL; |
---|
1477 | |
---|
1478 | /* call the end_element callback */ |
---|
1479 | tmp_error = NULL; |
---|
1480 | if (context->parser->end_element) |
---|
1481 | (* context->parser->end_element) (context, |
---|
1482 | close_name, |
---|
1483 | context->user_data, |
---|
1484 | &tmp_error); |
---|
1485 | |
---|
1486 | |
---|
1487 | /* Pop the tag stack */ |
---|
1488 | g_free (context->tag_stack->data); |
---|
1489 | context->tag_stack = g_slist_delete_link (context->tag_stack, |
---|
1490 | context->tag_stack); |
---|
1491 | |
---|
1492 | if (tmp_error) |
---|
1493 | { |
---|
1494 | mark_error (context, tmp_error); |
---|
1495 | g_propagate_error (error, tmp_error); |
---|
1496 | } |
---|
1497 | } |
---|
1498 | |
---|
1499 | g_free (close_name); |
---|
1500 | } |
---|
1501 | break; |
---|
1502 | |
---|
1503 | case STATE_INSIDE_PASSTHROUGH: |
---|
1504 | /* Possible next state: AFTER_CLOSE_ANGLE */ |
---|
1505 | do |
---|
1506 | { |
---|
1507 | if (*context->iter == '<') |
---|
1508 | context->balance++; |
---|
1509 | if (*context->iter == '>') |
---|
1510 | { |
---|
1511 | context->balance--; |
---|
1512 | add_to_partial (context, context->start, context->iter); |
---|
1513 | context->start = context->iter; |
---|
1514 | if ((g_str_has_prefix (context->partial_chunk->str, "<?") |
---|
1515 | && g_str_has_suffix (context->partial_chunk->str, "?")) || |
---|
1516 | (g_str_has_prefix (context->partial_chunk->str, "<!--") |
---|
1517 | && g_str_has_suffix (context->partial_chunk->str, "--")) || |
---|
1518 | (g_str_has_prefix (context->partial_chunk->str, "<![CDATA[") |
---|
1519 | && g_str_has_suffix (context->partial_chunk->str, "]]")) || |
---|
1520 | (g_str_has_prefix (context->partial_chunk->str, "<!DOCTYPE") |
---|
1521 | && context->balance == 0)) |
---|
1522 | break; |
---|
1523 | } |
---|
1524 | } |
---|
1525 | while (advance_char (context)); |
---|
1526 | |
---|
1527 | if (context->iter == context->current_text_end) |
---|
1528 | { |
---|
1529 | /* The passthrough hasn't necessarily ended. Merge with |
---|
1530 | * partial chunk, leave state unchanged. |
---|
1531 | */ |
---|
1532 | add_to_partial (context, context->start, context->iter); |
---|
1533 | } |
---|
1534 | else |
---|
1535 | { |
---|
1536 | /* The passthrough has ended at the close angle. Combine |
---|
1537 | * it with the partial chunk if any. Call the passthrough |
---|
1538 | * callback. Note that the open/close angles are |
---|
1539 | * included in the text of the passthrough. |
---|
1540 | */ |
---|
1541 | GError *tmp_error = NULL; |
---|
1542 | |
---|
1543 | advance_char (context); /* advance past close angle */ |
---|
1544 | add_to_partial (context, context->start, context->iter); |
---|
1545 | |
---|
1546 | if (context->parser->passthrough) |
---|
1547 | (*context->parser->passthrough) (context, |
---|
1548 | context->partial_chunk->str, |
---|
1549 | context->partial_chunk->len, |
---|
1550 | context->user_data, |
---|
1551 | &tmp_error); |
---|
1552 | |
---|
1553 | truncate_partial (context); |
---|
1554 | |
---|
1555 | if (tmp_error == NULL) |
---|
1556 | { |
---|
1557 | context->state = STATE_AFTER_CLOSE_ANGLE; |
---|
1558 | context->start = context->iter; /* could begin text */ |
---|
1559 | } |
---|
1560 | else |
---|
1561 | { |
---|
1562 | mark_error (context, tmp_error); |
---|
1563 | g_propagate_error (error, tmp_error); |
---|
1564 | } |
---|
1565 | } |
---|
1566 | break; |
---|
1567 | |
---|
1568 | case STATE_ERROR: |
---|
1569 | goto finished; |
---|
1570 | break; |
---|
1571 | |
---|
1572 | default: |
---|
1573 | g_assert_not_reached (); |
---|
1574 | break; |
---|
1575 | } |
---|
1576 | } |
---|
1577 | |
---|
1578 | finished: |
---|
1579 | context->parsing = FALSE; |
---|
1580 | |
---|
1581 | return context->state != STATE_ERROR; |
---|
1582 | } |
---|
1583 | |
---|
1584 | /** |
---|
1585 | * g_markup_parse_context_end_parse: |
---|
1586 | * @context: a #GMarkupParseContext |
---|
1587 | * @error: return location for a #GError |
---|
1588 | * |
---|
1589 | * Signals to the #GMarkupParseContext that all data has been |
---|
1590 | * fed into the parse context with g_markup_parse_context_parse(). |
---|
1591 | * This function reports an error if the document isn't complete, |
---|
1592 | * for example if elements are still open. |
---|
1593 | * |
---|
1594 | * Return value: %TRUE on success, %FALSE if an error was set |
---|
1595 | **/ |
---|
1596 | gboolean |
---|
1597 | g_markup_parse_context_end_parse (GMarkupParseContext *context, |
---|
1598 | GError **error) |
---|
1599 | { |
---|
1600 | g_return_val_if_fail (context != NULL, FALSE); |
---|
1601 | g_return_val_if_fail (!context->parsing, FALSE); |
---|
1602 | g_return_val_if_fail (context->state != STATE_ERROR, FALSE); |
---|
1603 | |
---|
1604 | if (context->partial_chunk != NULL) |
---|
1605 | { |
---|
1606 | g_string_free (context->partial_chunk, TRUE); |
---|
1607 | context->partial_chunk = NULL; |
---|
1608 | } |
---|
1609 | |
---|
1610 | if (context->document_empty) |
---|
1611 | { |
---|
1612 | set_error (context, error, G_MARKUP_ERROR_EMPTY, |
---|
1613 | _("Document was empty or contained only whitespace")); |
---|
1614 | return FALSE; |
---|
1615 | } |
---|
1616 | |
---|
1617 | context->parsing = TRUE; |
---|
1618 | |
---|
1619 | switch (context->state) |
---|
1620 | { |
---|
1621 | case STATE_START: |
---|
1622 | /* Nothing to do */ |
---|
1623 | break; |
---|
1624 | |
---|
1625 | case STATE_AFTER_OPEN_ANGLE: |
---|
1626 | set_error (context, error, G_MARKUP_ERROR_PARSE, |
---|
1627 | _("Document ended unexpectedly just after an open angle bracket '<'")); |
---|
1628 | break; |
---|
1629 | |
---|
1630 | case STATE_AFTER_CLOSE_ANGLE: |
---|
1631 | if (context->tag_stack != NULL) |
---|
1632 | { |
---|
1633 | /* Error message the same as for INSIDE_TEXT */ |
---|
1634 | set_error (context, error, G_MARKUP_ERROR_PARSE, |
---|
1635 | _("Document ended unexpectedly with elements still open - " |
---|
1636 | "'%s' was the last element opened"), |
---|
1637 | current_element (context)); |
---|
1638 | } |
---|
1639 | break; |
---|
1640 | |
---|
1641 | case STATE_AFTER_ELISION_SLASH: |
---|
1642 | set_error (context, error, G_MARKUP_ERROR_PARSE, |
---|
1643 | _("Document ended unexpectedly, expected to see a close angle " |
---|
1644 | "bracket ending the tag <%s/>"), current_element (context)); |
---|
1645 | break; |
---|
1646 | |
---|
1647 | case STATE_INSIDE_OPEN_TAG_NAME: |
---|
1648 | set_error (context, error, G_MARKUP_ERROR_PARSE, |
---|
1649 | _("Document ended unexpectedly inside an element name")); |
---|
1650 | break; |
---|
1651 | |
---|
1652 | case STATE_INSIDE_ATTRIBUTE_NAME: |
---|
1653 | set_error (context, error, G_MARKUP_ERROR_PARSE, |
---|
1654 | _("Document ended unexpectedly inside an attribute name")); |
---|
1655 | break; |
---|
1656 | |
---|
1657 | case STATE_BETWEEN_ATTRIBUTES: |
---|
1658 | set_error (context, error, G_MARKUP_ERROR_PARSE, |
---|
1659 | _("Document ended unexpectedly inside an element-opening " |
---|
1660 | "tag.")); |
---|
1661 | break; |
---|
1662 | |
---|
1663 | case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN: |
---|
1664 | set_error (context, error, G_MARKUP_ERROR_PARSE, |
---|
1665 | _("Document ended unexpectedly after the equals sign " |
---|
1666 | "following an attribute name; no attribute value")); |
---|
1667 | break; |
---|
1668 | |
---|
1669 | case STATE_INSIDE_ATTRIBUTE_VALUE_SQ: |
---|
1670 | case STATE_INSIDE_ATTRIBUTE_VALUE_DQ: |
---|
1671 | set_error (context, error, G_MARKUP_ERROR_PARSE, |
---|
1672 | _("Document ended unexpectedly while inside an attribute " |
---|
1673 | "value")); |
---|
1674 | break; |
---|
1675 | |
---|
1676 | case STATE_INSIDE_TEXT: |
---|
1677 | g_assert (context->tag_stack != NULL); |
---|
1678 | set_error (context, error, G_MARKUP_ERROR_PARSE, |
---|
1679 | _("Document ended unexpectedly with elements still open - " |
---|
1680 | "'%s' was the last element opened"), |
---|
1681 | current_element (context)); |
---|
1682 | break; |
---|
1683 | |
---|
1684 | case STATE_AFTER_CLOSE_TAG_SLASH: |
---|
1685 | case STATE_INSIDE_CLOSE_TAG_NAME: |
---|
1686 | set_error (context, error, G_MARKUP_ERROR_PARSE, |
---|
1687 | _("Document ended unexpectedly inside the close tag for " |
---|
1688 | "element '%s'"), current_element); |
---|
1689 | break; |
---|
1690 | |
---|
1691 | case STATE_INSIDE_PASSTHROUGH: |
---|
1692 | set_error (context, error, G_MARKUP_ERROR_PARSE, |
---|
1693 | _("Document ended unexpectedly inside a comment or " |
---|
1694 | "processing instruction")); |
---|
1695 | break; |
---|
1696 | |
---|
1697 | case STATE_ERROR: |
---|
1698 | default: |
---|
1699 | g_assert_not_reached (); |
---|
1700 | break; |
---|
1701 | } |
---|
1702 | |
---|
1703 | context->parsing = FALSE; |
---|
1704 | |
---|
1705 | return context->state != STATE_ERROR; |
---|
1706 | } |
---|
1707 | |
---|
1708 | /** |
---|
1709 | * g_markup_parse_context_get_element: |
---|
1710 | * @context: a #GMarkupParseContext |
---|
1711 | * @returns: the name of the currently open element, or %NULL |
---|
1712 | * |
---|
1713 | * Retrieves the name of the currently open element. |
---|
1714 | * |
---|
1715 | * Since: 2.2 |
---|
1716 | **/ |
---|
1717 | G_CONST_RETURN gchar * |
---|
1718 | g_markup_parse_context_get_element (GMarkupParseContext *context) |
---|
1719 | { |
---|
1720 | g_return_val_if_fail (context != NULL, NULL); |
---|
1721 | |
---|
1722 | if (context->tag_stack == NULL) |
---|
1723 | return NULL; |
---|
1724 | else |
---|
1725 | return current_element (context); |
---|
1726 | } |
---|
1727 | |
---|
1728 | /** |
---|
1729 | * g_markup_parse_context_get_position: |
---|
1730 | * @context: a #GMarkupParseContext |
---|
1731 | * @line_number: return location for a line number, or %NULL |
---|
1732 | * @char_number: return location for a char-on-line number, or %NULL |
---|
1733 | * |
---|
1734 | * Retrieves the current line number and the number of the character on |
---|
1735 | * that line. Intended for use in error messages; there are no strict |
---|
1736 | * semantics for what constitutes the "current" line number other than |
---|
1737 | * "the best number we could come up with for error messages." |
---|
1738 | * |
---|
1739 | **/ |
---|
1740 | void |
---|
1741 | g_markup_parse_context_get_position (GMarkupParseContext *context, |
---|
1742 | gint *line_number, |
---|
1743 | gint *char_number) |
---|
1744 | { |
---|
1745 | g_return_if_fail (context != NULL); |
---|
1746 | |
---|
1747 | if (line_number) |
---|
1748 | *line_number = context->line_number; |
---|
1749 | |
---|
1750 | if (char_number) |
---|
1751 | *char_number = context->char_number; |
---|
1752 | } |
---|
1753 | |
---|
1754 | static void |
---|
1755 | append_escaped_text (GString *str, |
---|
1756 | const gchar *text, |
---|
1757 | gssize length) |
---|
1758 | { |
---|
1759 | const gchar *p; |
---|
1760 | const gchar *end; |
---|
1761 | |
---|
1762 | p = text; |
---|
1763 | end = text + length; |
---|
1764 | |
---|
1765 | while (p != end) |
---|
1766 | { |
---|
1767 | const gchar *next; |
---|
1768 | next = g_utf8_next_char (p); |
---|
1769 | |
---|
1770 | switch (*p) |
---|
1771 | { |
---|
1772 | case '&': |
---|
1773 | g_string_append (str, "&"); |
---|
1774 | break; |
---|
1775 | |
---|
1776 | case '<': |
---|
1777 | g_string_append (str, "<"); |
---|
1778 | break; |
---|
1779 | |
---|
1780 | case '>': |
---|
1781 | g_string_append (str, ">"); |
---|
1782 | break; |
---|
1783 | |
---|
1784 | case '\'': |
---|
1785 | g_string_append (str, "'"); |
---|
1786 | break; |
---|
1787 | |
---|
1788 | case '"': |
---|
1789 | g_string_append (str, """); |
---|
1790 | break; |
---|
1791 | |
---|
1792 | default: |
---|
1793 | g_string_append_len (str, p, next - p); |
---|
1794 | break; |
---|
1795 | } |
---|
1796 | |
---|
1797 | p = next; |
---|
1798 | } |
---|
1799 | } |
---|
1800 | |
---|
1801 | /** |
---|
1802 | * g_markup_escape_text: |
---|
1803 | * @text: some valid UTF-8 text |
---|
1804 | * @length: length of @text in bytes |
---|
1805 | * |
---|
1806 | * Escapes text so that the markup parser will parse it verbatim. |
---|
1807 | * Less than, greater than, ampersand, etc. are replaced with the |
---|
1808 | * corresponding entities. This function would typically be used |
---|
1809 | * when writing out a file to be parsed with the markup parser. |
---|
1810 | * |
---|
1811 | * Return value: escaped text |
---|
1812 | **/ |
---|
1813 | gchar* |
---|
1814 | g_markup_escape_text (const gchar *text, |
---|
1815 | gssize length) |
---|
1816 | { |
---|
1817 | GString *str; |
---|
1818 | |
---|
1819 | g_return_val_if_fail (text != NULL, NULL); |
---|
1820 | |
---|
1821 | if (length < 0) |
---|
1822 | length = strlen (text); |
---|
1823 | |
---|
1824 | str = g_string_new (""); |
---|
1825 | append_escaped_text (str, text, length); |
---|
1826 | |
---|
1827 | return g_string_free (str, FALSE); |
---|
1828 | } |
---|