1 | /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ |
---|
2 | /* This file is part of the GtkHTML library. |
---|
3 | |
---|
4 | Copyright (C) 1998 World Wide Web Consortium |
---|
5 | Copyright (C) 2000 Helix Code, Inc. |
---|
6 | |
---|
7 | This library is free software; you can redistribute it and/or |
---|
8 | modify it under the terms of the GNU Library General Public |
---|
9 | License as published by the Free Software Foundation; either |
---|
10 | version 2 of the License, or (at your option) any later version. |
---|
11 | |
---|
12 | This library is distributed in the hope that it will be useful, |
---|
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
15 | Library General Public License for more details. |
---|
16 | |
---|
17 | You should have received a copy of the GNU Library General Public License |
---|
18 | along with this library; see the file COPYING.LIB. If not, write to |
---|
19 | the Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
---|
20 | Boston, MA 02111-1307, USA. |
---|
21 | |
---|
22 | Author: Ettore Perazzoli <ettore@helixcode.com> |
---|
23 | `encode_entities()' adapted from gnome-xml by Daniel Veillard |
---|
24 | <Daniel.Veillard@w3.org>. |
---|
25 | */ |
---|
26 | |
---|
27 | #include <config.h> |
---|
28 | #include <string.h> |
---|
29 | |
---|
30 | #include "config.h" |
---|
31 | #include "htmlcolor.h" |
---|
32 | #include "htmlengine.h" |
---|
33 | #include "htmlimage.h" |
---|
34 | #include "htmlentity.h" |
---|
35 | #include "htmlengine-save.h" |
---|
36 | #include "htmlsettings.h" |
---|
37 | |
---|
38 | #include "gtkhtmldebug.h" |
---|
39 | |
---|
40 | |
---|
41 | /* This routine was originally written by Daniel Velliard, (C) 1998 World Wide |
---|
42 | Web Consortium. */ |
---|
43 | gchar * |
---|
44 | html_encode_entities (const gchar *input, guint len, guint *encoded_len_return) |
---|
45 | { |
---|
46 | gunichar uc; |
---|
47 | const gchar *p; |
---|
48 | guchar *buffer = NULL; |
---|
49 | guchar *out = NULL; |
---|
50 | gint buffer_size = 0; |
---|
51 | guint count; |
---|
52 | |
---|
53 | /* Allocate an translation buffer. */ |
---|
54 | buffer_size = 1000; |
---|
55 | buffer = g_malloc (buffer_size); |
---|
56 | |
---|
57 | out = buffer; |
---|
58 | p = input; |
---|
59 | count = 0; |
---|
60 | |
---|
61 | while (p && *p && count < len) { |
---|
62 | if (out - buffer > buffer_size - 100) { |
---|
63 | gint index = out - buffer; |
---|
64 | |
---|
65 | buffer_size *= 2; |
---|
66 | buffer = g_realloc (buffer, buffer_size); |
---|
67 | out = &buffer[index]; |
---|
68 | } |
---|
69 | uc = g_utf8_get_char (p); |
---|
70 | |
---|
71 | /* By default one have to encode at least '<', '>', '"' and '&'. */ |
---|
72 | |
---|
73 | if (uc == '<') { |
---|
74 | *out++ = '&'; |
---|
75 | *out++ = 'l'; |
---|
76 | *out++ = 't'; |
---|
77 | *out++ = ';'; |
---|
78 | } else if (uc == '>') { |
---|
79 | *out++ = '&'; |
---|
80 | *out++ = 'g'; |
---|
81 | *out++ = 't'; |
---|
82 | *out++ = ';'; |
---|
83 | } else if (uc == '&') { |
---|
84 | *out++ = '&'; |
---|
85 | *out++ = 'a'; |
---|
86 | *out++ = 'm'; |
---|
87 | *out++ = 'p'; |
---|
88 | *out++ = ';'; |
---|
89 | } else if (uc == '"') { |
---|
90 | *out++ = '&'; |
---|
91 | *out++ = 'q'; |
---|
92 | *out++ = 'u'; |
---|
93 | *out++ = 'o'; |
---|
94 | *out++ = 't'; |
---|
95 | *out++ = ';'; |
---|
96 | } else if (uc == ENTITY_NBSP) { |
---|
97 | *out++ = '&'; |
---|
98 | *out++ = 'n'; |
---|
99 | *out++ = 'b'; |
---|
100 | *out++ = 's'; |
---|
101 | *out++ = 'p'; |
---|
102 | *out++ = ';'; |
---|
103 | } else if (((uc >= 0x20) && (uc < 0x80)) |
---|
104 | || (uc == '\n') || (uc == '\r') || (uc == '\t')) { |
---|
105 | /* Default case, just copy. */ |
---|
106 | *out++ = uc; |
---|
107 | } else { |
---|
108 | char buf[10], *ptr; |
---|
109 | |
---|
110 | g_snprintf(buf, 9, "&#%d;", uc); |
---|
111 | |
---|
112 | ptr = buf; |
---|
113 | while (*ptr != 0) |
---|
114 | *out++ = *ptr++; |
---|
115 | } |
---|
116 | |
---|
117 | count++; |
---|
118 | p = g_utf8_next_char (p); |
---|
119 | } |
---|
120 | |
---|
121 | *out = 0; |
---|
122 | if (encoded_len_return) |
---|
123 | *encoded_len_return = out - buffer; |
---|
124 | |
---|
125 | return buffer; |
---|
126 | } |
---|
127 | |
---|
128 | gboolean |
---|
129 | html_engine_save_encode (HTMLEngineSaveState *state, |
---|
130 | const gchar *buffer, |
---|
131 | guint length) |
---|
132 | { |
---|
133 | guchar *encoded_buffer; |
---|
134 | guint encoded_length; |
---|
135 | gboolean success; |
---|
136 | |
---|
137 | g_return_val_if_fail (state != NULL, FALSE); |
---|
138 | g_return_val_if_fail (buffer != NULL, FALSE); |
---|
139 | |
---|
140 | if (length == 0) |
---|
141 | return TRUE; |
---|
142 | |
---|
143 | encoded_buffer = html_encode_entities ((const guchar *) buffer, length, &encoded_length); |
---|
144 | success = state->receiver (state->engine, encoded_buffer, encoded_length, state->user_data); |
---|
145 | |
---|
146 | g_free (encoded_buffer); |
---|
147 | return success; |
---|
148 | } |
---|
149 | |
---|
150 | gboolean |
---|
151 | html_engine_save_encode_string (HTMLEngineSaveState *state, |
---|
152 | const gchar *s) |
---|
153 | { |
---|
154 | guint len; |
---|
155 | |
---|
156 | g_return_val_if_fail (state != NULL, FALSE); |
---|
157 | g_return_val_if_fail (s != NULL, FALSE); |
---|
158 | |
---|
159 | len = strlen (s); |
---|
160 | |
---|
161 | return html_engine_save_encode (state, s, len); |
---|
162 | } |
---|
163 | |
---|
164 | gboolean |
---|
165 | html_engine_save_output_stringv (HTMLEngineSaveState *state, |
---|
166 | const char *format, |
---|
167 | va_list ap) |
---|
168 | { |
---|
169 | char *string; |
---|
170 | gboolean retval; |
---|
171 | |
---|
172 | string = g_strdup_vprintf (format, ap); |
---|
173 | retval = state->receiver (state->engine, string, strlen (string), state->user_data); |
---|
174 | g_free (string); |
---|
175 | |
---|
176 | return retval; |
---|
177 | } |
---|
178 | |
---|
179 | gboolean |
---|
180 | html_engine_save_output_string (HTMLEngineSaveState *state, |
---|
181 | const gchar *format, |
---|
182 | ...) |
---|
183 | { |
---|
184 | va_list args; |
---|
185 | gboolean retval; |
---|
186 | |
---|
187 | g_return_val_if_fail (format != NULL, FALSE); |
---|
188 | g_return_val_if_fail (state != NULL, FALSE); |
---|
189 | |
---|
190 | va_start (args, format); |
---|
191 | retval = html_engine_save_output_stringv (state, format, args); |
---|
192 | va_end (args); |
---|
193 | |
---|
194 | return retval; |
---|
195 | } |
---|
196 | |
---|
197 | gboolean |
---|
198 | html_engine_save_output_buffer (HTMLEngineSaveState *state, const gchar *buffer, int bytes) |
---|
199 | { |
---|
200 | if (bytes == -1) |
---|
201 | bytes = strlen (buffer); |
---|
202 | return state->receiver (state->engine, buffer, bytes, state->user_data); |
---|
203 | } |
---|
204 | |
---|
205 | |
---|
206 | static gchar * |
---|
207 | color_to_string (gchar *s, HTMLColor *c) |
---|
208 | { |
---|
209 | gchar color [20]; |
---|
210 | |
---|
211 | g_snprintf (color, 20, " %s=\"#%02x%02x%02x\"", s, c->color.red >> 8, c->color.green >> 8, c->color.blue >> 8); |
---|
212 | return g_strdup (color); |
---|
213 | } |
---|
214 | |
---|
215 | static gchar * |
---|
216 | get_body (HTMLEngine *e) |
---|
217 | { |
---|
218 | HTMLColorSet *cset; |
---|
219 | gchar *body; |
---|
220 | gchar *text; |
---|
221 | gchar *bg; |
---|
222 | gchar *bg_image; |
---|
223 | gchar *link; |
---|
224 | gchar *lm, *rm, *tm, *bm; |
---|
225 | gchar *url = NULL; |
---|
226 | |
---|
227 | cset = e->settings->color_set; |
---|
228 | text = (cset->changed [HTMLTextColor]) ? color_to_string ("TEXT", cset->color [HTMLTextColor]) : g_strdup (""); |
---|
229 | link = (cset->changed [HTMLLinkColor]) ? color_to_string ("LINK", cset->color [HTMLLinkColor]) : g_strdup (""); |
---|
230 | bg = (cset->changed [HTMLBgColor]) ? color_to_string ("BGCOLOR", cset->color [HTMLBgColor]) : g_strdup (""); |
---|
231 | bg_image = e->bgPixmapPtr ? g_strdup_printf (" BACKGROUND=\"%s\"", |
---|
232 | url = html_image_resolve_image_url |
---|
233 | (e->widget, ((HTMLImagePointer *) e->bgPixmapPtr)->url)) |
---|
234 | : g_strdup (""); |
---|
235 | g_free (url); |
---|
236 | |
---|
237 | lm = e->leftBorder != LEFT_BORDER ? g_strdup_printf (" LEFTMARGIN=\"%d\"", e->leftBorder) : g_strdup (""); |
---|
238 | rm = e->rightBorder != RIGHT_BORDER ? g_strdup_printf (" RIGHTMARGIN=\"%d\"", e->rightBorder) : g_strdup (""); |
---|
239 | tm = e->topBorder != TOP_BORDER ? g_strdup_printf (" TOPMARGIN=\"%d\"", e->topBorder) : g_strdup (""); |
---|
240 | bm = e->bottomBorder != BOTTOM_BORDER ? g_strdup_printf (" BOTTOMMARGIN=\"%d\"", e->bottomBorder) : g_strdup (""); |
---|
241 | |
---|
242 | body = g_strconcat ("<BODY", text, link, bg, bg_image, lm, rm, tm, bm, ">\n", NULL); |
---|
243 | |
---|
244 | g_free (lm); |
---|
245 | g_free (rm); |
---|
246 | g_free (tm); |
---|
247 | g_free (bm); |
---|
248 | g_free (text); |
---|
249 | g_free (link); |
---|
250 | g_free (bg); |
---|
251 | g_free (bg_image); |
---|
252 | |
---|
253 | return body; |
---|
254 | } |
---|
255 | |
---|
256 | static gboolean |
---|
257 | write_header (HTMLEngineSaveState *state) |
---|
258 | { |
---|
259 | gboolean retval = TRUE; |
---|
260 | gchar *body; |
---|
261 | |
---|
262 | html_engine_clear_all_class_data (state->engine); |
---|
263 | /* Preface. */ |
---|
264 | if (! html_engine_save_output_string |
---|
265 | (state, |
---|
266 | "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 TRANSITIONAL//EN\">\n" |
---|
267 | "<HTML>\n")) |
---|
268 | return FALSE; |
---|
269 | |
---|
270 | /* Header start. FIXME: `GENERATOR' string? */ |
---|
271 | if (! html_engine_save_output_string |
---|
272 | (state, |
---|
273 | "<HEAD>\n" |
---|
274 | " <META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; CHARSET=UTF-8\">\n" |
---|
275 | " <META NAME=\"GENERATOR\" CONTENT=\"GtkHTML/%s\">\n", VERSION)) |
---|
276 | return FALSE; |
---|
277 | |
---|
278 | /* Title. */ |
---|
279 | if (state->engine->title != NULL |
---|
280 | && state->engine->title->str != NULL |
---|
281 | && state->engine->title->str[0] != '\0') { |
---|
282 | if (! html_engine_save_output_string (state, " <TITLE>") |
---|
283 | || ! html_engine_save_encode_string (state, state->engine->title->str) |
---|
284 | || ! html_engine_save_output_string (state, "</TITLE>\n")) |
---|
285 | return FALSE; |
---|
286 | } |
---|
287 | |
---|
288 | /* End of header. */ |
---|
289 | if (! html_engine_save_output_string (state, "</HEAD>\n")) |
---|
290 | return FALSE; |
---|
291 | |
---|
292 | /* Start of body. */ |
---|
293 | body = get_body (state->engine); |
---|
294 | if (!html_engine_save_output_string (state, "%s", body)) |
---|
295 | retval = FALSE; |
---|
296 | g_free (body); |
---|
297 | |
---|
298 | return retval; |
---|
299 | } |
---|
300 | |
---|
301 | static gboolean |
---|
302 | write_end (HTMLEngineSaveState *state) |
---|
303 | { |
---|
304 | if (! html_engine_save_output_string (state, "</BODY>\n</HTML>\n")) |
---|
305 | return FALSE; |
---|
306 | |
---|
307 | html_engine_clear_all_class_data (state->engine); |
---|
308 | |
---|
309 | return TRUE; |
---|
310 | } |
---|
311 | |
---|
312 | gboolean |
---|
313 | html_engine_save (HTMLEngine *engine, |
---|
314 | HTMLEngineSaveReceiverFn receiver, |
---|
315 | gpointer user_data) |
---|
316 | { |
---|
317 | HTMLEngineSaveState state; |
---|
318 | |
---|
319 | if (engine->clue == NULL) { |
---|
320 | /* Empty document. */ |
---|
321 | return FALSE; |
---|
322 | } |
---|
323 | |
---|
324 | /* gtk_html_debug_dump_tree_simple (engine->clue, 1); */ |
---|
325 | |
---|
326 | state.engine = engine; |
---|
327 | state.receiver = receiver; |
---|
328 | state.br_count = 0; |
---|
329 | state.error = FALSE; |
---|
330 | state.inline_frames = FALSE; |
---|
331 | state.user_data = user_data; |
---|
332 | state.last_level = 0; |
---|
333 | |
---|
334 | if (! write_header (&state)) |
---|
335 | return FALSE; |
---|
336 | |
---|
337 | html_object_save (engine->clue, &state); |
---|
338 | if (state.error) |
---|
339 | return FALSE; |
---|
340 | |
---|
341 | if (! write_end (&state)) |
---|
342 | return FALSE; |
---|
343 | |
---|
344 | return TRUE; |
---|
345 | } |
---|
346 | |
---|
347 | gboolean |
---|
348 | html_engine_save_plain (HTMLEngine *engine, |
---|
349 | HTMLEngineSaveReceiverFn receiver, |
---|
350 | gpointer user_data) |
---|
351 | { |
---|
352 | HTMLEngineSaveState state; |
---|
353 | |
---|
354 | if (engine->clue == NULL) { |
---|
355 | /* Empty document. */ |
---|
356 | return FALSE; |
---|
357 | } |
---|
358 | |
---|
359 | /* gtk_html_debug_dump_tree_simple (engine->clue, 1); */ |
---|
360 | |
---|
361 | state.engine = engine; |
---|
362 | state.receiver = receiver; |
---|
363 | state.br_count = 0; |
---|
364 | state.error = FALSE; |
---|
365 | state.inline_frames = FALSE; |
---|
366 | state.user_data = user_data; |
---|
367 | state.last_level = 0; |
---|
368 | |
---|
369 | /* FIXME don't hardcode the length */ |
---|
370 | html_object_save_plain (engine->clue, &state, 72); |
---|
371 | if (state.error) |
---|
372 | return FALSE; |
---|
373 | |
---|
374 | return TRUE; |
---|
375 | } |
---|
376 | |
---|
377 | static gboolean |
---|
378 | html_engine_save_buffer_receiver (const HTMLEngine *engine, |
---|
379 | const gchar *data, |
---|
380 | guint len, |
---|
381 | gpointer user_data) |
---|
382 | { |
---|
383 | g_string_append ((GString *)user_data, (gchar *)data); |
---|
384 | |
---|
385 | return TRUE; |
---|
386 | } |
---|
387 | |
---|
388 | void |
---|
389 | html_engine_save_buffer_free (HTMLEngineSaveState *state) |
---|
390 | { |
---|
391 | GString *string; |
---|
392 | |
---|
393 | g_return_if_fail (state != NULL); |
---|
394 | string = (GString *)state->user_data; |
---|
395 | |
---|
396 | g_string_free (string, TRUE); |
---|
397 | |
---|
398 | g_free (state); |
---|
399 | } |
---|
400 | |
---|
401 | guchar * |
---|
402 | html_engine_save_buffer_peek_text (HTMLEngineSaveState *state) |
---|
403 | { |
---|
404 | GString *string; |
---|
405 | |
---|
406 | g_return_val_if_fail (state != NULL, NULL); |
---|
407 | string = (GString *)state->user_data; |
---|
408 | |
---|
409 | return string->str; |
---|
410 | } |
---|
411 | |
---|
412 | int |
---|
413 | html_engine_save_buffer_peek_text_bytes (HTMLEngineSaveState *state) |
---|
414 | { |
---|
415 | GString *string; |
---|
416 | |
---|
417 | g_return_val_if_fail (state != NULL, 0); |
---|
418 | string = (GString *)state->user_data; |
---|
419 | |
---|
420 | return string->len; |
---|
421 | } |
---|
422 | |
---|
423 | HTMLEngineSaveState * |
---|
424 | html_engine_save_buffer_new (HTMLEngine *engine, gboolean inline_frames) |
---|
425 | { |
---|
426 | HTMLEngineSaveState *state = g_new0 (HTMLEngineSaveState, 1); |
---|
427 | |
---|
428 | if (state) { |
---|
429 | state->engine = engine; |
---|
430 | state->receiver = (HTMLEngineSaveReceiverFn)html_engine_save_buffer_receiver; |
---|
431 | state->br_count = 0; |
---|
432 | state->error = FALSE; |
---|
433 | state->inline_frames = inline_frames; |
---|
434 | state->user_data = (gpointer) g_string_new (""); |
---|
435 | state->last_level = 0; |
---|
436 | } |
---|
437 | |
---|
438 | return state; |
---|
439 | } |
---|
440 | |
---|
441 | gchar * |
---|
442 | html_engine_save_get_sample_body (HTMLEngine *e, |
---|
443 | HTMLObject *o) |
---|
444 | { |
---|
445 | return get_body (e); |
---|
446 | } |
---|
447 | |
---|
448 | const gchar * |
---|
449 | html_engine_save_get_paragraph_style (GtkHTMLParagraphStyle style) |
---|
450 | { |
---|
451 | switch (style) { |
---|
452 | case GTK_HTML_PARAGRAPH_STYLE_NORMAL: |
---|
453 | return NULL; |
---|
454 | case GTK_HTML_PARAGRAPH_STYLE_H1: |
---|
455 | return "h1"; |
---|
456 | case GTK_HTML_PARAGRAPH_STYLE_H2: |
---|
457 | return "h2"; |
---|
458 | case GTK_HTML_PARAGRAPH_STYLE_H3: |
---|
459 | return "h3"; |
---|
460 | case GTK_HTML_PARAGRAPH_STYLE_H4: |
---|
461 | return "h4"; |
---|
462 | case GTK_HTML_PARAGRAPH_STYLE_H5: |
---|
463 | return "h5"; |
---|
464 | case GTK_HTML_PARAGRAPH_STYLE_H6: |
---|
465 | return "h6"; |
---|
466 | case GTK_HTML_PARAGRAPH_STYLE_ADDRESS: |
---|
467 | return "address"; |
---|
468 | case GTK_HTML_PARAGRAPH_STYLE_PRE: |
---|
469 | return "pre"; |
---|
470 | case GTK_HTML_PARAGRAPH_STYLE_ITEMDOTTED: |
---|
471 | case GTK_HTML_PARAGRAPH_STYLE_ITEMROMAN: |
---|
472 | case GTK_HTML_PARAGRAPH_STYLE_ITEMDIGIT: |
---|
473 | case GTK_HTML_PARAGRAPH_STYLE_ITEMALPHA: |
---|
474 | return "li"; |
---|
475 | } |
---|
476 | |
---|
477 | g_warning ("Unknown GtkHTMLParagraphStyle %d", style); |
---|
478 | |
---|
479 | return NULL; |
---|
480 | } |
---|
481 | |
---|
482 | const gchar * |
---|
483 | html_engine_save_get_paragraph_align (GtkHTMLParagraphAlignment align) |
---|
484 | { |
---|
485 | switch (align) { |
---|
486 | case GTK_HTML_PARAGRAPH_ALIGNMENT_RIGHT: |
---|
487 | return "right"; |
---|
488 | case GTK_HTML_PARAGRAPH_ALIGNMENT_CENTER: |
---|
489 | return "center"; |
---|
490 | case GTK_HTML_PARAGRAPH_ALIGNMENT_LEFT: |
---|
491 | return "left"; |
---|
492 | } |
---|
493 | |
---|
494 | g_warning ("Unknown GtkHTMLParagraphAlignment %d", align); |
---|
495 | |
---|
496 | return NULL; |
---|
497 | } |
---|
498 | |
---|
499 | gint |
---|
500 | html_engine_save_string_append_nonbsp (GString *out, const guchar *s, guint length) |
---|
501 | { |
---|
502 | guint len = length; |
---|
503 | |
---|
504 | while (len--) { |
---|
505 | if (IS_UTF8_NBSP (s)) { |
---|
506 | g_string_append_c (out, ' '); |
---|
507 | s += 2; |
---|
508 | len--; |
---|
509 | } else { |
---|
510 | g_string_append_c (out, *s); |
---|
511 | s++; |
---|
512 | } |
---|
513 | } |
---|
514 | return length; |
---|
515 | } |
---|