1 | /* |
---|
2 | * HTMLtree.c : implementation of access function for an HTML tree. |
---|
3 | * |
---|
4 | * See Copyright for the status of this software. |
---|
5 | * |
---|
6 | * daniel@veillard.com |
---|
7 | */ |
---|
8 | |
---|
9 | |
---|
10 | #define IN_LIBXML |
---|
11 | #include "libxml.h" |
---|
12 | #ifdef LIBXML_HTML_ENABLED |
---|
13 | |
---|
14 | #include <string.h> /* for memset() only ! */ |
---|
15 | |
---|
16 | #ifdef HAVE_CTYPE_H |
---|
17 | #include <ctype.h> |
---|
18 | #endif |
---|
19 | #ifdef HAVE_STDLIB_H |
---|
20 | #include <stdlib.h> |
---|
21 | #endif |
---|
22 | |
---|
23 | #include <libxml/xmlmemory.h> |
---|
24 | #include <libxml/HTMLparser.h> |
---|
25 | #include <libxml/HTMLtree.h> |
---|
26 | #include <libxml/entities.h> |
---|
27 | #include <libxml/valid.h> |
---|
28 | #include <libxml/xmlerror.h> |
---|
29 | #include <libxml/parserInternals.h> |
---|
30 | #include <libxml/globals.h> |
---|
31 | #include <libxml/uri.h> |
---|
32 | |
---|
33 | /************************************************************************ |
---|
34 | * * |
---|
35 | * Getting/Setting encoding meta tags * |
---|
36 | * * |
---|
37 | ************************************************************************/ |
---|
38 | |
---|
39 | /** |
---|
40 | * htmlGetMetaEncoding: |
---|
41 | * @doc: the document |
---|
42 | * |
---|
43 | * Encoding definition lookup in the Meta tags |
---|
44 | * |
---|
45 | * Returns the current encoding as flagged in the HTML source |
---|
46 | */ |
---|
47 | const xmlChar * |
---|
48 | htmlGetMetaEncoding(htmlDocPtr doc) { |
---|
49 | htmlNodePtr cur; |
---|
50 | const xmlChar *content; |
---|
51 | const xmlChar *encoding; |
---|
52 | |
---|
53 | if (doc == NULL) |
---|
54 | return(NULL); |
---|
55 | cur = doc->children; |
---|
56 | |
---|
57 | /* |
---|
58 | * Search the html |
---|
59 | */ |
---|
60 | while (cur != NULL) { |
---|
61 | if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { |
---|
62 | if (xmlStrEqual(cur->name, BAD_CAST"html")) |
---|
63 | break; |
---|
64 | if (xmlStrEqual(cur->name, BAD_CAST"head")) |
---|
65 | goto found_head; |
---|
66 | if (xmlStrEqual(cur->name, BAD_CAST"meta")) |
---|
67 | goto found_meta; |
---|
68 | } |
---|
69 | cur = cur->next; |
---|
70 | } |
---|
71 | if (cur == NULL) |
---|
72 | return(NULL); |
---|
73 | cur = cur->children; |
---|
74 | |
---|
75 | /* |
---|
76 | * Search the head |
---|
77 | */ |
---|
78 | while (cur != NULL) { |
---|
79 | if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { |
---|
80 | if (xmlStrEqual(cur->name, BAD_CAST"head")) |
---|
81 | break; |
---|
82 | if (xmlStrEqual(cur->name, BAD_CAST"meta")) |
---|
83 | goto found_meta; |
---|
84 | } |
---|
85 | cur = cur->next; |
---|
86 | } |
---|
87 | if (cur == NULL) |
---|
88 | return(NULL); |
---|
89 | found_head: |
---|
90 | cur = cur->children; |
---|
91 | |
---|
92 | /* |
---|
93 | * Search the meta elements |
---|
94 | */ |
---|
95 | found_meta: |
---|
96 | while (cur != NULL) { |
---|
97 | if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { |
---|
98 | if (xmlStrEqual(cur->name, BAD_CAST"meta")) { |
---|
99 | xmlAttrPtr attr = cur->properties; |
---|
100 | int http; |
---|
101 | const xmlChar *value; |
---|
102 | |
---|
103 | content = NULL; |
---|
104 | http = 0; |
---|
105 | while (attr != NULL) { |
---|
106 | if ((attr->children != NULL) && |
---|
107 | (attr->children->type == XML_TEXT_NODE) && |
---|
108 | (attr->children->next == NULL)) { |
---|
109 | value = attr->children->content; |
---|
110 | if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv")) |
---|
111 | && (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) |
---|
112 | http = 1; |
---|
113 | else if ((value != NULL) |
---|
114 | && (!xmlStrcasecmp(attr->name, BAD_CAST"content"))) |
---|
115 | content = value; |
---|
116 | if ((http != 0) && (content != NULL)) |
---|
117 | goto found_content; |
---|
118 | } |
---|
119 | attr = attr->next; |
---|
120 | } |
---|
121 | } |
---|
122 | } |
---|
123 | cur = cur->next; |
---|
124 | } |
---|
125 | return(NULL); |
---|
126 | |
---|
127 | found_content: |
---|
128 | encoding = xmlStrstr(content, BAD_CAST"charset="); |
---|
129 | if (encoding == NULL) |
---|
130 | encoding = xmlStrstr(content, BAD_CAST"Charset="); |
---|
131 | if (encoding == NULL) |
---|
132 | encoding = xmlStrstr(content, BAD_CAST"CHARSET="); |
---|
133 | if (encoding != NULL) { |
---|
134 | encoding += 8; |
---|
135 | } else { |
---|
136 | encoding = xmlStrstr(content, BAD_CAST"charset ="); |
---|
137 | if (encoding == NULL) |
---|
138 | encoding = xmlStrstr(content, BAD_CAST"Charset ="); |
---|
139 | if (encoding == NULL) |
---|
140 | encoding = xmlStrstr(content, BAD_CAST"CHARSET ="); |
---|
141 | if (encoding != NULL) |
---|
142 | encoding += 9; |
---|
143 | } |
---|
144 | if (encoding != NULL) { |
---|
145 | while ((*encoding == ' ') || (*encoding == '\t')) encoding++; |
---|
146 | } |
---|
147 | return(encoding); |
---|
148 | } |
---|
149 | |
---|
150 | /** |
---|
151 | * htmlSetMetaEncoding: |
---|
152 | * @doc: the document |
---|
153 | * @encoding: the encoding string |
---|
154 | * |
---|
155 | * Sets the current encoding in the Meta tags |
---|
156 | * NOTE: this will not change the document content encoding, just |
---|
157 | * the META flag associated. |
---|
158 | * |
---|
159 | * Returns 0 in case of success and -1 in case of error |
---|
160 | */ |
---|
161 | int |
---|
162 | htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) { |
---|
163 | htmlNodePtr cur, meta; |
---|
164 | const xmlChar *content; |
---|
165 | char newcontent[100]; |
---|
166 | |
---|
167 | |
---|
168 | if (doc == NULL) |
---|
169 | return(-1); |
---|
170 | |
---|
171 | if (encoding != NULL) { |
---|
172 | snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s", |
---|
173 | (char *)encoding); |
---|
174 | newcontent[sizeof(newcontent) - 1] = 0; |
---|
175 | } |
---|
176 | |
---|
177 | cur = doc->children; |
---|
178 | |
---|
179 | /* |
---|
180 | * Search the html |
---|
181 | */ |
---|
182 | while (cur != NULL) { |
---|
183 | if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { |
---|
184 | if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0) |
---|
185 | break; |
---|
186 | if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0) |
---|
187 | goto found_head; |
---|
188 | if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) |
---|
189 | goto found_meta; |
---|
190 | } |
---|
191 | cur = cur->next; |
---|
192 | } |
---|
193 | if (cur == NULL) |
---|
194 | return(-1); |
---|
195 | cur = cur->children; |
---|
196 | |
---|
197 | /* |
---|
198 | * Search the head |
---|
199 | */ |
---|
200 | while (cur != NULL) { |
---|
201 | if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { |
---|
202 | if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0) |
---|
203 | break; |
---|
204 | if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) |
---|
205 | goto found_meta; |
---|
206 | } |
---|
207 | cur = cur->next; |
---|
208 | } |
---|
209 | if (cur == NULL) |
---|
210 | return(-1); |
---|
211 | found_head: |
---|
212 | if (cur->children == NULL) { |
---|
213 | if (encoding == NULL) |
---|
214 | return(0); |
---|
215 | meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL); |
---|
216 | xmlAddChild(cur, meta); |
---|
217 | xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type"); |
---|
218 | xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent); |
---|
219 | return(0); |
---|
220 | } |
---|
221 | cur = cur->children; |
---|
222 | |
---|
223 | found_meta: |
---|
224 | if (encoding != NULL) { |
---|
225 | /* |
---|
226 | * Create a new Meta element with the right attributes |
---|
227 | */ |
---|
228 | |
---|
229 | meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL); |
---|
230 | xmlAddPrevSibling(cur, meta); |
---|
231 | xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type"); |
---|
232 | xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent); |
---|
233 | } |
---|
234 | |
---|
235 | /* |
---|
236 | * Search and destroy all the remaining the meta elements carrying |
---|
237 | * encoding informations |
---|
238 | */ |
---|
239 | while (cur != NULL) { |
---|
240 | if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { |
---|
241 | if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) { |
---|
242 | xmlAttrPtr attr = cur->properties; |
---|
243 | int http; |
---|
244 | const xmlChar *value; |
---|
245 | |
---|
246 | content = NULL; |
---|
247 | http = 0; |
---|
248 | while (attr != NULL) { |
---|
249 | if ((attr->children != NULL) && |
---|
250 | (attr->children->type == XML_TEXT_NODE) && |
---|
251 | (attr->children->next == NULL)) { |
---|
252 | value = attr->children->content; |
---|
253 | if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv")) |
---|
254 | && (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) |
---|
255 | http = 1; |
---|
256 | else |
---|
257 | { |
---|
258 | if ((value != NULL) && |
---|
259 | (!xmlStrcasecmp(attr->name, BAD_CAST"content"))) |
---|
260 | content = value; |
---|
261 | } |
---|
262 | if ((http != 0) && (content != NULL)) |
---|
263 | break; |
---|
264 | } |
---|
265 | attr = attr->next; |
---|
266 | } |
---|
267 | if ((http != 0) && (content != NULL)) { |
---|
268 | meta = cur; |
---|
269 | cur = cur->next; |
---|
270 | xmlUnlinkNode(meta); |
---|
271 | xmlFreeNode(meta); |
---|
272 | continue; |
---|
273 | } |
---|
274 | |
---|
275 | } |
---|
276 | } |
---|
277 | cur = cur->next; |
---|
278 | } |
---|
279 | return(0); |
---|
280 | } |
---|
281 | |
---|
282 | /** |
---|
283 | * booleanHTMLAttrs: |
---|
284 | * |
---|
285 | * These are the HTML attributes which will be output |
---|
286 | * in minimized form, i.e. <option selected="selected"> will be |
---|
287 | * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method" |
---|
288 | * |
---|
289 | */ |
---|
290 | static const char* htmlBooleanAttrs[] = { |
---|
291 | "checked", "compact", "declare", "defer", "disabled", "ismap", |
---|
292 | "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly", |
---|
293 | "selected", NULL |
---|
294 | }; |
---|
295 | |
---|
296 | |
---|
297 | /** |
---|
298 | * htmlIsBooleanAttr: |
---|
299 | * @name: the name of the attribute to check |
---|
300 | * |
---|
301 | * Determine if a given attribute is a boolean attribute. |
---|
302 | * |
---|
303 | * returns: false if the attribute is not boolean, true otherwise. |
---|
304 | */ |
---|
305 | int |
---|
306 | htmlIsBooleanAttr(const xmlChar *name) |
---|
307 | { |
---|
308 | int i = 0; |
---|
309 | |
---|
310 | while (htmlBooleanAttrs[i] != NULL) { |
---|
311 | if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0) |
---|
312 | return 1; |
---|
313 | i++; |
---|
314 | } |
---|
315 | return 0; |
---|
316 | } |
---|
317 | |
---|
318 | #ifdef LIBXML_OUTPUT_ENABLED |
---|
319 | /************************************************************************ |
---|
320 | * * |
---|
321 | * Output error handlers * |
---|
322 | * * |
---|
323 | ************************************************************************/ |
---|
324 | /** |
---|
325 | * htmlSaveErrMemory: |
---|
326 | * @extra: extra informations |
---|
327 | * |
---|
328 | * Handle an out of memory condition |
---|
329 | */ |
---|
330 | static void |
---|
331 | htmlSaveErrMemory(const char *extra) |
---|
332 | { |
---|
333 | __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra); |
---|
334 | } |
---|
335 | |
---|
336 | /** |
---|
337 | * htmlSaveErr: |
---|
338 | * @code: the error number |
---|
339 | * @node: the location of the error. |
---|
340 | * @extra: extra informations |
---|
341 | * |
---|
342 | * Handle an out of memory condition |
---|
343 | */ |
---|
344 | static void |
---|
345 | htmlSaveErr(int code, xmlNodePtr node, const char *extra) |
---|
346 | { |
---|
347 | const char *msg = NULL; |
---|
348 | |
---|
349 | switch(code) { |
---|
350 | case XML_SAVE_NOT_UTF8: |
---|
351 | msg = "string is not in UTF-8"; |
---|
352 | break; |
---|
353 | case XML_SAVE_CHAR_INVALID: |
---|
354 | msg = "invalid character value"; |
---|
355 | break; |
---|
356 | case XML_SAVE_UNKNOWN_ENCODING: |
---|
357 | msg = "unknown encoding %s"; |
---|
358 | break; |
---|
359 | case XML_SAVE_NO_DOCTYPE: |
---|
360 | msg = "HTML has no DOCTYPE"; |
---|
361 | break; |
---|
362 | default: |
---|
363 | msg = "unexpected error number"; |
---|
364 | } |
---|
365 | __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra); |
---|
366 | } |
---|
367 | |
---|
368 | /************************************************************************ |
---|
369 | * * |
---|
370 | * Dumping HTML tree content to a simple buffer * |
---|
371 | * * |
---|
372 | ************************************************************************/ |
---|
373 | |
---|
374 | static int |
---|
375 | htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, |
---|
376 | int format); |
---|
377 | |
---|
378 | /** |
---|
379 | * htmlNodeDumpFormat: |
---|
380 | * @buf: the HTML buffer output |
---|
381 | * @doc: the document |
---|
382 | * @cur: the current node |
---|
383 | * @format: should formatting spaces been added |
---|
384 | * |
---|
385 | * Dump an HTML node, recursive behaviour,children are printed too. |
---|
386 | * |
---|
387 | * Returns the number of byte written or -1 in case of error |
---|
388 | */ |
---|
389 | static int |
---|
390 | htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, |
---|
391 | int format) { |
---|
392 | unsigned int use; |
---|
393 | int ret; |
---|
394 | xmlOutputBufferPtr outbuf; |
---|
395 | |
---|
396 | if (cur == NULL) { |
---|
397 | return (-1); |
---|
398 | } |
---|
399 | if (buf == NULL) { |
---|
400 | return (-1); |
---|
401 | } |
---|
402 | outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer)); |
---|
403 | if (outbuf == NULL) { |
---|
404 | htmlSaveErrMemory("allocating HTML output buffer"); |
---|
405 | return (-1); |
---|
406 | } |
---|
407 | memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer)); |
---|
408 | outbuf->buffer = buf; |
---|
409 | outbuf->encoder = NULL; |
---|
410 | outbuf->writecallback = NULL; |
---|
411 | outbuf->closecallback = NULL; |
---|
412 | outbuf->context = NULL; |
---|
413 | outbuf->written = 0; |
---|
414 | |
---|
415 | use = buf->use; |
---|
416 | htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format); |
---|
417 | xmlFree(outbuf); |
---|
418 | ret = buf->use - use; |
---|
419 | return (ret); |
---|
420 | } |
---|
421 | |
---|
422 | /** |
---|
423 | * htmlNodeDump: |
---|
424 | * @buf: the HTML buffer output |
---|
425 | * @doc: the document |
---|
426 | * @cur: the current node |
---|
427 | * |
---|
428 | * Dump an HTML node, recursive behaviour,children are printed too, |
---|
429 | * and formatting returns are added. |
---|
430 | * |
---|
431 | * Returns the number of byte written or -1 in case of error |
---|
432 | */ |
---|
433 | int |
---|
434 | htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) { |
---|
435 | xmlInitParser(); |
---|
436 | |
---|
437 | return(htmlNodeDumpFormat(buf, doc, cur, 1)); |
---|
438 | } |
---|
439 | |
---|
440 | /** |
---|
441 | * htmlNodeDumpFileFormat: |
---|
442 | * @out: the FILE pointer |
---|
443 | * @doc: the document |
---|
444 | * @cur: the current node |
---|
445 | * @encoding: the document encoding |
---|
446 | * @format: should formatting spaces been added |
---|
447 | * |
---|
448 | * Dump an HTML node, recursive behaviour,children are printed too. |
---|
449 | * |
---|
450 | * TODO: if encoding == NULL try to save in the doc encoding |
---|
451 | * |
---|
452 | * returns: the number of byte written or -1 in case of failure. |
---|
453 | */ |
---|
454 | int |
---|
455 | htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc, |
---|
456 | xmlNodePtr cur, const char *encoding, int format) { |
---|
457 | xmlOutputBufferPtr buf; |
---|
458 | xmlCharEncodingHandlerPtr handler = NULL; |
---|
459 | int ret; |
---|
460 | |
---|
461 | xmlInitParser(); |
---|
462 | |
---|
463 | if (encoding != NULL) { |
---|
464 | xmlCharEncoding enc; |
---|
465 | |
---|
466 | enc = xmlParseCharEncoding(encoding); |
---|
467 | if (enc != XML_CHAR_ENCODING_UTF8) { |
---|
468 | handler = xmlFindCharEncodingHandler(encoding); |
---|
469 | if (handler == NULL) |
---|
470 | return(-1); |
---|
471 | } |
---|
472 | } |
---|
473 | |
---|
474 | /* |
---|
475 | * Fallback to HTML or ASCII when the encoding is unspecified |
---|
476 | */ |
---|
477 | if (handler == NULL) |
---|
478 | handler = xmlFindCharEncodingHandler("HTML"); |
---|
479 | if (handler == NULL) |
---|
480 | handler = xmlFindCharEncodingHandler("ascii"); |
---|
481 | |
---|
482 | /* |
---|
483 | * save the content to a temp buffer. |
---|
484 | */ |
---|
485 | buf = xmlOutputBufferCreateFile(out, handler); |
---|
486 | if (buf == NULL) return(0); |
---|
487 | |
---|
488 | htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format); |
---|
489 | |
---|
490 | ret = xmlOutputBufferClose(buf); |
---|
491 | return(ret); |
---|
492 | } |
---|
493 | |
---|
494 | /** |
---|
495 | * htmlNodeDumpFile: |
---|
496 | * @out: the FILE pointer |
---|
497 | * @doc: the document |
---|
498 | * @cur: the current node |
---|
499 | * |
---|
500 | * Dump an HTML node, recursive behaviour,children are printed too, |
---|
501 | * and formatting returns are added. |
---|
502 | */ |
---|
503 | void |
---|
504 | htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) { |
---|
505 | htmlNodeDumpFileFormat(out, doc, cur, NULL, 1); |
---|
506 | } |
---|
507 | |
---|
508 | /** |
---|
509 | * htmlDocDumpMemory: |
---|
510 | * @cur: the document |
---|
511 | * @mem: OUT: the memory pointer |
---|
512 | * @size: OUT: the memory length |
---|
513 | * |
---|
514 | * Dump an HTML document in memory and return the xmlChar * and it's size. |
---|
515 | * It's up to the caller to free the memory. |
---|
516 | */ |
---|
517 | void |
---|
518 | htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) { |
---|
519 | xmlOutputBufferPtr buf; |
---|
520 | xmlCharEncodingHandlerPtr handler = NULL; |
---|
521 | const char *encoding; |
---|
522 | |
---|
523 | xmlInitParser(); |
---|
524 | |
---|
525 | if ((mem == NULL) || (size == NULL)) |
---|
526 | return; |
---|
527 | if (cur == NULL) { |
---|
528 | *mem = NULL; |
---|
529 | *size = 0; |
---|
530 | return; |
---|
531 | } |
---|
532 | |
---|
533 | encoding = (const char *) htmlGetMetaEncoding(cur); |
---|
534 | |
---|
535 | if (encoding != NULL) { |
---|
536 | xmlCharEncoding enc; |
---|
537 | |
---|
538 | enc = xmlParseCharEncoding(encoding); |
---|
539 | if (enc != cur->charset) { |
---|
540 | if (cur->charset != XML_CHAR_ENCODING_UTF8) { |
---|
541 | /* |
---|
542 | * Not supported yet |
---|
543 | */ |
---|
544 | *mem = NULL; |
---|
545 | *size = 0; |
---|
546 | return; |
---|
547 | } |
---|
548 | |
---|
549 | handler = xmlFindCharEncodingHandler(encoding); |
---|
550 | if (handler == NULL) { |
---|
551 | *mem = NULL; |
---|
552 | *size = 0; |
---|
553 | return; |
---|
554 | } |
---|
555 | } |
---|
556 | } |
---|
557 | |
---|
558 | /* |
---|
559 | * Fallback to HTML or ASCII when the encoding is unspecified |
---|
560 | */ |
---|
561 | if (handler == NULL) |
---|
562 | handler = xmlFindCharEncodingHandler("HTML"); |
---|
563 | if (handler == NULL) |
---|
564 | handler = xmlFindCharEncodingHandler("ascii"); |
---|
565 | |
---|
566 | buf = xmlAllocOutputBuffer(handler); |
---|
567 | if (buf == NULL) { |
---|
568 | *mem = NULL; |
---|
569 | *size = 0; |
---|
570 | return; |
---|
571 | } |
---|
572 | |
---|
573 | htmlDocContentDumpOutput(buf, cur, NULL); |
---|
574 | xmlOutputBufferFlush(buf); |
---|
575 | if (buf->conv != NULL) { |
---|
576 | *size = buf->conv->use; |
---|
577 | *mem = xmlStrndup(buf->conv->content, *size); |
---|
578 | } else { |
---|
579 | *size = buf->buffer->use; |
---|
580 | *mem = xmlStrndup(buf->buffer->content, *size); |
---|
581 | } |
---|
582 | (void)xmlOutputBufferClose(buf); |
---|
583 | } |
---|
584 | |
---|
585 | |
---|
586 | /************************************************************************ |
---|
587 | * * |
---|
588 | * Dumping HTML tree content to an I/O output buffer * |
---|
589 | * * |
---|
590 | ************************************************************************/ |
---|
591 | |
---|
592 | void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur); |
---|
593 | |
---|
594 | /** |
---|
595 | * htmlDtdDumpOutput: |
---|
596 | * @buf: the HTML buffer output |
---|
597 | * @doc: the document |
---|
598 | * @encoding: the encoding string |
---|
599 | * |
---|
600 | * TODO: check whether encoding is needed |
---|
601 | * |
---|
602 | * Dump the HTML document DTD, if any. |
---|
603 | */ |
---|
604 | static void |
---|
605 | htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, |
---|
606 | const char *encoding ATTRIBUTE_UNUSED) { |
---|
607 | xmlDtdPtr cur = doc->intSubset; |
---|
608 | |
---|
609 | if (cur == NULL) { |
---|
610 | htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL); |
---|
611 | return; |
---|
612 | } |
---|
613 | xmlOutputBufferWriteString(buf, "<!DOCTYPE "); |
---|
614 | xmlOutputBufferWriteString(buf, (const char *)cur->name); |
---|
615 | if (cur->ExternalID != NULL) { |
---|
616 | xmlOutputBufferWriteString(buf, " PUBLIC "); |
---|
617 | xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID); |
---|
618 | if (cur->SystemID != NULL) { |
---|
619 | xmlOutputBufferWriteString(buf, " "); |
---|
620 | xmlBufferWriteQuotedString(buf->buffer, cur->SystemID); |
---|
621 | } |
---|
622 | } else if (cur->SystemID != NULL) { |
---|
623 | xmlOutputBufferWriteString(buf, " SYSTEM "); |
---|
624 | xmlBufferWriteQuotedString(buf->buffer, cur->SystemID); |
---|
625 | } |
---|
626 | xmlOutputBufferWriteString(buf, ">\n"); |
---|
627 | } |
---|
628 | |
---|
629 | /** |
---|
630 | * htmlAttrDumpOutput: |
---|
631 | * @buf: the HTML buffer output |
---|
632 | * @doc: the document |
---|
633 | * @cur: the attribute pointer |
---|
634 | * @encoding: the encoding string |
---|
635 | * |
---|
636 | * Dump an HTML attribute |
---|
637 | */ |
---|
638 | static void |
---|
639 | htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, |
---|
640 | const char *encoding ATTRIBUTE_UNUSED) { |
---|
641 | xmlChar *value; |
---|
642 | |
---|
643 | /* |
---|
644 | * TODO: The html output method should not escape a & character |
---|
645 | * occurring in an attribute value immediately followed by |
---|
646 | * a { character (see Section B.7.1 of the HTML 4.0 Recommendation). |
---|
647 | */ |
---|
648 | |
---|
649 | if (cur == NULL) { |
---|
650 | return; |
---|
651 | } |
---|
652 | xmlOutputBufferWriteString(buf, " "); |
---|
653 | if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { |
---|
654 | xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); |
---|
655 | xmlOutputBufferWriteString(buf, ":"); |
---|
656 | } |
---|
657 | xmlOutputBufferWriteString(buf, (const char *)cur->name); |
---|
658 | if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) { |
---|
659 | value = xmlNodeListGetString(doc, cur->children, 0); |
---|
660 | if (value) { |
---|
661 | xmlOutputBufferWriteString(buf, "="); |
---|
662 | if ((cur->ns == NULL) && (cur->parent != NULL) && |
---|
663 | (cur->parent->ns == NULL) && |
---|
664 | ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) || |
---|
665 | (!xmlStrcasecmp(cur->name, BAD_CAST "action")) || |
---|
666 | (!xmlStrcasecmp(cur->name, BAD_CAST "src")))) { |
---|
667 | xmlChar *escaped; |
---|
668 | xmlChar *tmp = value; |
---|
669 | |
---|
670 | while (IS_BLANK_CH(*tmp)) tmp++; |
---|
671 | |
---|
672 | escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+"); |
---|
673 | if (escaped != NULL) { |
---|
674 | xmlBufferWriteQuotedString(buf->buffer, escaped); |
---|
675 | xmlFree(escaped); |
---|
676 | } else { |
---|
677 | xmlBufferWriteQuotedString(buf->buffer, value); |
---|
678 | } |
---|
679 | } else { |
---|
680 | xmlBufferWriteQuotedString(buf->buffer, value); |
---|
681 | } |
---|
682 | xmlFree(value); |
---|
683 | } else { |
---|
684 | xmlOutputBufferWriteString(buf, "=\"\""); |
---|
685 | } |
---|
686 | } |
---|
687 | } |
---|
688 | |
---|
689 | /** |
---|
690 | * htmlAttrListDumpOutput: |
---|
691 | * @buf: the HTML buffer output |
---|
692 | * @doc: the document |
---|
693 | * @cur: the first attribute pointer |
---|
694 | * @encoding: the encoding string |
---|
695 | * |
---|
696 | * Dump a list of HTML attributes |
---|
697 | */ |
---|
698 | static void |
---|
699 | htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) { |
---|
700 | if (cur == NULL) { |
---|
701 | return; |
---|
702 | } |
---|
703 | while (cur != NULL) { |
---|
704 | htmlAttrDumpOutput(buf, doc, cur, encoding); |
---|
705 | cur = cur->next; |
---|
706 | } |
---|
707 | } |
---|
708 | |
---|
709 | |
---|
710 | |
---|
711 | /** |
---|
712 | * htmlNodeListDumpOutput: |
---|
713 | * @buf: the HTML buffer output |
---|
714 | * @doc: the document |
---|
715 | * @cur: the first node |
---|
716 | * @encoding: the encoding string |
---|
717 | * @format: should formatting spaces been added |
---|
718 | * |
---|
719 | * Dump an HTML node list, recursive behaviour,children are printed too. |
---|
720 | */ |
---|
721 | static void |
---|
722 | htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, |
---|
723 | xmlNodePtr cur, const char *encoding, int format) { |
---|
724 | if (cur == NULL) { |
---|
725 | return; |
---|
726 | } |
---|
727 | while (cur != NULL) { |
---|
728 | htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format); |
---|
729 | cur = cur->next; |
---|
730 | } |
---|
731 | } |
---|
732 | |
---|
733 | /** |
---|
734 | * htmlNodeDumpFormatOutput: |
---|
735 | * @buf: the HTML buffer output |
---|
736 | * @doc: the document |
---|
737 | * @cur: the current node |
---|
738 | * @encoding: the encoding string |
---|
739 | * @format: should formatting spaces been added |
---|
740 | * |
---|
741 | * Dump an HTML node, recursive behaviour,children are printed too. |
---|
742 | */ |
---|
743 | void |
---|
744 | htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, |
---|
745 | xmlNodePtr cur, const char *encoding, int format) { |
---|
746 | const htmlElemDesc * info; |
---|
747 | |
---|
748 | xmlInitParser(); |
---|
749 | |
---|
750 | if ((cur == NULL) || (buf == NULL)) { |
---|
751 | return; |
---|
752 | } |
---|
753 | /* |
---|
754 | * Special cases. |
---|
755 | */ |
---|
756 | if (cur->type == XML_DTD_NODE) |
---|
757 | return; |
---|
758 | if ((cur->type == XML_HTML_DOCUMENT_NODE) || |
---|
759 | (cur->type == XML_DOCUMENT_NODE)){ |
---|
760 | htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding); |
---|
761 | return; |
---|
762 | } |
---|
763 | if (cur->type == HTML_TEXT_NODE) { |
---|
764 | if (cur->content != NULL) { |
---|
765 | if (((cur->name == (const xmlChar *)xmlStringText) || |
---|
766 | (cur->name != (const xmlChar *)xmlStringTextNoenc)) && |
---|
767 | ((cur->parent == NULL) || |
---|
768 | ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) && |
---|
769 | (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) { |
---|
770 | xmlChar *buffer; |
---|
771 | |
---|
772 | buffer = xmlEncodeEntitiesReentrant(doc, cur->content); |
---|
773 | if (buffer != NULL) { |
---|
774 | xmlOutputBufferWriteString(buf, (const char *)buffer); |
---|
775 | xmlFree(buffer); |
---|
776 | } |
---|
777 | } else { |
---|
778 | xmlOutputBufferWriteString(buf, (const char *)cur->content); |
---|
779 | } |
---|
780 | } |
---|
781 | return; |
---|
782 | } |
---|
783 | if (cur->type == HTML_COMMENT_NODE) { |
---|
784 | if (cur->content != NULL) { |
---|
785 | xmlOutputBufferWriteString(buf, "<!--"); |
---|
786 | xmlOutputBufferWriteString(buf, (const char *)cur->content); |
---|
787 | xmlOutputBufferWriteString(buf, "-->"); |
---|
788 | } |
---|
789 | return; |
---|
790 | } |
---|
791 | if (cur->type == HTML_PI_NODE) { |
---|
792 | if (cur->name == NULL) |
---|
793 | return; |
---|
794 | xmlOutputBufferWriteString(buf, "<?"); |
---|
795 | xmlOutputBufferWriteString(buf, (const char *)cur->name); |
---|
796 | if (cur->content != NULL) { |
---|
797 | xmlOutputBufferWriteString(buf, " "); |
---|
798 | xmlOutputBufferWriteString(buf, (const char *)cur->content); |
---|
799 | } |
---|
800 | xmlOutputBufferWriteString(buf, ">"); |
---|
801 | return; |
---|
802 | } |
---|
803 | if (cur->type == HTML_ENTITY_REF_NODE) { |
---|
804 | xmlOutputBufferWriteString(buf, "&"); |
---|
805 | xmlOutputBufferWriteString(buf, (const char *)cur->name); |
---|
806 | xmlOutputBufferWriteString(buf, ";"); |
---|
807 | return; |
---|
808 | } |
---|
809 | if (cur->type == HTML_PRESERVE_NODE) { |
---|
810 | if (cur->content != NULL) { |
---|
811 | xmlOutputBufferWriteString(buf, (const char *)cur->content); |
---|
812 | } |
---|
813 | return; |
---|
814 | } |
---|
815 | |
---|
816 | /* |
---|
817 | * Get specific HTML info for that node. |
---|
818 | */ |
---|
819 | if (cur->ns == NULL) |
---|
820 | info = htmlTagLookup(cur->name); |
---|
821 | else |
---|
822 | info = NULL; |
---|
823 | |
---|
824 | xmlOutputBufferWriteString(buf, "<"); |
---|
825 | if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { |
---|
826 | xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); |
---|
827 | xmlOutputBufferWriteString(buf, ":"); |
---|
828 | } |
---|
829 | xmlOutputBufferWriteString(buf, (const char *)cur->name); |
---|
830 | if (cur->nsDef) |
---|
831 | xmlNsListDumpOutput(buf, cur->nsDef); |
---|
832 | if (cur->properties != NULL) |
---|
833 | htmlAttrListDumpOutput(buf, doc, cur->properties, encoding); |
---|
834 | |
---|
835 | if ((info != NULL) && (info->empty)) { |
---|
836 | xmlOutputBufferWriteString(buf, ">"); |
---|
837 | if ((format) && (!info->isinline) && (cur->next != NULL)) { |
---|
838 | if ((cur->next->type != HTML_TEXT_NODE) && |
---|
839 | (cur->next->type != HTML_ENTITY_REF_NODE) && |
---|
840 | (cur->parent != NULL) && |
---|
841 | (cur->parent->name != NULL) && |
---|
842 | (cur->parent->name[0] != 'p')) /* p, pre, param */ |
---|
843 | xmlOutputBufferWriteString(buf, "\n"); |
---|
844 | } |
---|
845 | return; |
---|
846 | } |
---|
847 | if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) && |
---|
848 | (cur->children == NULL)) { |
---|
849 | if ((info != NULL) && (info->saveEndTag != 0) && |
---|
850 | (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) && |
---|
851 | (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) { |
---|
852 | xmlOutputBufferWriteString(buf, ">"); |
---|
853 | } else { |
---|
854 | xmlOutputBufferWriteString(buf, "></"); |
---|
855 | if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { |
---|
856 | xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); |
---|
857 | xmlOutputBufferWriteString(buf, ":"); |
---|
858 | } |
---|
859 | xmlOutputBufferWriteString(buf, (const char *)cur->name); |
---|
860 | xmlOutputBufferWriteString(buf, ">"); |
---|
861 | } |
---|
862 | if ((format) && (cur->next != NULL) && |
---|
863 | (info != NULL) && (!info->isinline)) { |
---|
864 | if ((cur->next->type != HTML_TEXT_NODE) && |
---|
865 | (cur->next->type != HTML_ENTITY_REF_NODE) && |
---|
866 | (cur->parent != NULL) && |
---|
867 | (cur->parent->name != NULL) && |
---|
868 | (cur->parent->name[0] != 'p')) /* p, pre, param */ |
---|
869 | xmlOutputBufferWriteString(buf, "\n"); |
---|
870 | } |
---|
871 | return; |
---|
872 | } |
---|
873 | xmlOutputBufferWriteString(buf, ">"); |
---|
874 | if ((cur->type != XML_ELEMENT_NODE) && |
---|
875 | (cur->content != NULL)) { |
---|
876 | /* |
---|
877 | * Uses the OutputBuffer property to automatically convert |
---|
878 | * invalids to charrefs |
---|
879 | */ |
---|
880 | |
---|
881 | xmlOutputBufferWriteString(buf, (const char *) cur->content); |
---|
882 | } |
---|
883 | if (cur->children != NULL) { |
---|
884 | if ((format) && (info != NULL) && (!info->isinline) && |
---|
885 | (cur->children->type != HTML_TEXT_NODE) && |
---|
886 | (cur->children->type != HTML_ENTITY_REF_NODE) && |
---|
887 | (cur->children != cur->last) && |
---|
888 | (cur->name != NULL) && |
---|
889 | (cur->name[0] != 'p')) /* p, pre, param */ |
---|
890 | xmlOutputBufferWriteString(buf, "\n"); |
---|
891 | htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format); |
---|
892 | if ((format) && (info != NULL) && (!info->isinline) && |
---|
893 | (cur->last->type != HTML_TEXT_NODE) && |
---|
894 | (cur->last->type != HTML_ENTITY_REF_NODE) && |
---|
895 | (cur->children != cur->last) && |
---|
896 | (cur->name != NULL) && |
---|
897 | (cur->name[0] != 'p')) /* p, pre, param */ |
---|
898 | xmlOutputBufferWriteString(buf, "\n"); |
---|
899 | } |
---|
900 | xmlOutputBufferWriteString(buf, "</"); |
---|
901 | if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { |
---|
902 | xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); |
---|
903 | xmlOutputBufferWriteString(buf, ":"); |
---|
904 | } |
---|
905 | xmlOutputBufferWriteString(buf, (const char *)cur->name); |
---|
906 | xmlOutputBufferWriteString(buf, ">"); |
---|
907 | if ((format) && (info != NULL) && (!info->isinline) && |
---|
908 | (cur->next != NULL)) { |
---|
909 | if ((cur->next->type != HTML_TEXT_NODE) && |
---|
910 | (cur->next->type != HTML_ENTITY_REF_NODE) && |
---|
911 | (cur->parent != NULL) && |
---|
912 | (cur->parent->name != NULL) && |
---|
913 | (cur->parent->name[0] != 'p')) /* p, pre, param */ |
---|
914 | xmlOutputBufferWriteString(buf, "\n"); |
---|
915 | } |
---|
916 | } |
---|
917 | |
---|
918 | /** |
---|
919 | * htmlNodeDumpOutput: |
---|
920 | * @buf: the HTML buffer output |
---|
921 | * @doc: the document |
---|
922 | * @cur: the current node |
---|
923 | * @encoding: the encoding string |
---|
924 | * |
---|
925 | * Dump an HTML node, recursive behaviour,children are printed too, |
---|
926 | * and formatting returns/spaces are added. |
---|
927 | */ |
---|
928 | void |
---|
929 | htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, |
---|
930 | xmlNodePtr cur, const char *encoding) { |
---|
931 | htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1); |
---|
932 | } |
---|
933 | |
---|
934 | /** |
---|
935 | * htmlDocContentDumpFormatOutput: |
---|
936 | * @buf: the HTML buffer output |
---|
937 | * @cur: the document |
---|
938 | * @encoding: the encoding string |
---|
939 | * @format: should formatting spaces been added |
---|
940 | * |
---|
941 | * Dump an HTML document. |
---|
942 | */ |
---|
943 | void |
---|
944 | htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, |
---|
945 | const char *encoding, int format) { |
---|
946 | int type; |
---|
947 | |
---|
948 | xmlInitParser(); |
---|
949 | |
---|
950 | if ((buf == NULL) || (cur == NULL)) |
---|
951 | return; |
---|
952 | |
---|
953 | /* |
---|
954 | * force to output the stuff as HTML, especially for entities |
---|
955 | */ |
---|
956 | type = cur->type; |
---|
957 | cur->type = XML_HTML_DOCUMENT_NODE; |
---|
958 | if (cur->intSubset != NULL) { |
---|
959 | htmlDtdDumpOutput(buf, cur, NULL); |
---|
960 | } |
---|
961 | if (cur->children != NULL) { |
---|
962 | htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format); |
---|
963 | } |
---|
964 | xmlOutputBufferWriteString(buf, "\n"); |
---|
965 | cur->type = (xmlElementType) type; |
---|
966 | } |
---|
967 | |
---|
968 | /** |
---|
969 | * htmlDocContentDumpOutput: |
---|
970 | * @buf: the HTML buffer output |
---|
971 | * @cur: the document |
---|
972 | * @encoding: the encoding string |
---|
973 | * |
---|
974 | * Dump an HTML document. Formating return/spaces are added. |
---|
975 | */ |
---|
976 | void |
---|
977 | htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, |
---|
978 | const char *encoding) { |
---|
979 | htmlDocContentDumpFormatOutput(buf, cur, encoding, 1); |
---|
980 | } |
---|
981 | |
---|
982 | /************************************************************************ |
---|
983 | * * |
---|
984 | * Saving functions front-ends * |
---|
985 | * * |
---|
986 | ************************************************************************/ |
---|
987 | |
---|
988 | /** |
---|
989 | * htmlDocDump: |
---|
990 | * @f: the FILE* |
---|
991 | * @cur: the document |
---|
992 | * |
---|
993 | * Dump an HTML document to an open FILE. |
---|
994 | * |
---|
995 | * returns: the number of byte written or -1 in case of failure. |
---|
996 | */ |
---|
997 | int |
---|
998 | htmlDocDump(FILE *f, xmlDocPtr cur) { |
---|
999 | xmlOutputBufferPtr buf; |
---|
1000 | xmlCharEncodingHandlerPtr handler = NULL; |
---|
1001 | const char *encoding; |
---|
1002 | int ret; |
---|
1003 | |
---|
1004 | xmlInitParser(); |
---|
1005 | |
---|
1006 | if ((cur == NULL) || (f == NULL)) { |
---|
1007 | return(-1); |
---|
1008 | } |
---|
1009 | |
---|
1010 | encoding = (const char *) htmlGetMetaEncoding(cur); |
---|
1011 | |
---|
1012 | if (encoding != NULL) { |
---|
1013 | xmlCharEncoding enc; |
---|
1014 | |
---|
1015 | enc = xmlParseCharEncoding(encoding); |
---|
1016 | if (enc != cur->charset) { |
---|
1017 | if (cur->charset != XML_CHAR_ENCODING_UTF8) { |
---|
1018 | /* |
---|
1019 | * Not supported yet |
---|
1020 | */ |
---|
1021 | return(-1); |
---|
1022 | } |
---|
1023 | |
---|
1024 | handler = xmlFindCharEncodingHandler(encoding); |
---|
1025 | if (handler == NULL) |
---|
1026 | return(-1); |
---|
1027 | } |
---|
1028 | } |
---|
1029 | |
---|
1030 | /* |
---|
1031 | * Fallback to HTML or ASCII when the encoding is unspecified |
---|
1032 | */ |
---|
1033 | if (handler == NULL) |
---|
1034 | handler = xmlFindCharEncodingHandler("HTML"); |
---|
1035 | if (handler == NULL) |
---|
1036 | handler = xmlFindCharEncodingHandler("ascii"); |
---|
1037 | |
---|
1038 | buf = xmlOutputBufferCreateFile(f, handler); |
---|
1039 | if (buf == NULL) return(-1); |
---|
1040 | htmlDocContentDumpOutput(buf, cur, NULL); |
---|
1041 | |
---|
1042 | ret = xmlOutputBufferClose(buf); |
---|
1043 | return(ret); |
---|
1044 | } |
---|
1045 | |
---|
1046 | /** |
---|
1047 | * htmlSaveFile: |
---|
1048 | * @filename: the filename (or URL) |
---|
1049 | * @cur: the document |
---|
1050 | * |
---|
1051 | * Dump an HTML document to a file. If @filename is "-" the stdout file is |
---|
1052 | * used. |
---|
1053 | * returns: the number of byte written or -1 in case of failure. |
---|
1054 | */ |
---|
1055 | int |
---|
1056 | htmlSaveFile(const char *filename, xmlDocPtr cur) { |
---|
1057 | xmlOutputBufferPtr buf; |
---|
1058 | xmlCharEncodingHandlerPtr handler = NULL; |
---|
1059 | const char *encoding; |
---|
1060 | int ret; |
---|
1061 | |
---|
1062 | if ((cur == NULL) || (filename == NULL)) |
---|
1063 | return(-1); |
---|
1064 | |
---|
1065 | xmlInitParser(); |
---|
1066 | |
---|
1067 | encoding = (const char *) htmlGetMetaEncoding(cur); |
---|
1068 | |
---|
1069 | if (encoding != NULL) { |
---|
1070 | xmlCharEncoding enc; |
---|
1071 | |
---|
1072 | enc = xmlParseCharEncoding(encoding); |
---|
1073 | if (enc != cur->charset) { |
---|
1074 | if (cur->charset != XML_CHAR_ENCODING_UTF8) { |
---|
1075 | /* |
---|
1076 | * Not supported yet |
---|
1077 | */ |
---|
1078 | return(-1); |
---|
1079 | } |
---|
1080 | |
---|
1081 | handler = xmlFindCharEncodingHandler(encoding); |
---|
1082 | if (handler == NULL) |
---|
1083 | return(-1); |
---|
1084 | } |
---|
1085 | } |
---|
1086 | |
---|
1087 | /* |
---|
1088 | * Fallback to HTML or ASCII when the encoding is unspecified |
---|
1089 | */ |
---|
1090 | if (handler == NULL) |
---|
1091 | handler = xmlFindCharEncodingHandler("HTML"); |
---|
1092 | if (handler == NULL) |
---|
1093 | handler = xmlFindCharEncodingHandler("ascii"); |
---|
1094 | |
---|
1095 | /* |
---|
1096 | * save the content to a temp buffer. |
---|
1097 | */ |
---|
1098 | buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression); |
---|
1099 | if (buf == NULL) return(0); |
---|
1100 | |
---|
1101 | htmlDocContentDumpOutput(buf, cur, NULL); |
---|
1102 | |
---|
1103 | ret = xmlOutputBufferClose(buf); |
---|
1104 | return(ret); |
---|
1105 | } |
---|
1106 | |
---|
1107 | /** |
---|
1108 | * htmlSaveFileFormat: |
---|
1109 | * @filename: the filename |
---|
1110 | * @cur: the document |
---|
1111 | * @format: should formatting spaces been added |
---|
1112 | * @encoding: the document encoding |
---|
1113 | * |
---|
1114 | * Dump an HTML document to a file using a given encoding. |
---|
1115 | * |
---|
1116 | * returns: the number of byte written or -1 in case of failure. |
---|
1117 | */ |
---|
1118 | int |
---|
1119 | htmlSaveFileFormat(const char *filename, xmlDocPtr cur, |
---|
1120 | const char *encoding, int format) { |
---|
1121 | xmlOutputBufferPtr buf; |
---|
1122 | xmlCharEncodingHandlerPtr handler = NULL; |
---|
1123 | int ret; |
---|
1124 | |
---|
1125 | if ((cur == NULL) || (filename == NULL)) |
---|
1126 | return(-1); |
---|
1127 | |
---|
1128 | xmlInitParser(); |
---|
1129 | |
---|
1130 | if (encoding != NULL) { |
---|
1131 | xmlCharEncoding enc; |
---|
1132 | |
---|
1133 | enc = xmlParseCharEncoding(encoding); |
---|
1134 | if (enc != cur->charset) { |
---|
1135 | if (cur->charset != XML_CHAR_ENCODING_UTF8) { |
---|
1136 | /* |
---|
1137 | * Not supported yet |
---|
1138 | */ |
---|
1139 | return(-1); |
---|
1140 | } |
---|
1141 | |
---|
1142 | handler = xmlFindCharEncodingHandler(encoding); |
---|
1143 | if (handler == NULL) |
---|
1144 | return(-1); |
---|
1145 | htmlSetMetaEncoding(cur, (const xmlChar *) encoding); |
---|
1146 | } |
---|
1147 | } else { |
---|
1148 | htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8"); |
---|
1149 | } |
---|
1150 | |
---|
1151 | /* |
---|
1152 | * Fallback to HTML or ASCII when the encoding is unspecified |
---|
1153 | */ |
---|
1154 | if (handler == NULL) |
---|
1155 | handler = xmlFindCharEncodingHandler("HTML"); |
---|
1156 | if (handler == NULL) |
---|
1157 | handler = xmlFindCharEncodingHandler("ascii"); |
---|
1158 | |
---|
1159 | /* |
---|
1160 | * save the content to a temp buffer. |
---|
1161 | */ |
---|
1162 | buf = xmlOutputBufferCreateFilename(filename, handler, 0); |
---|
1163 | if (buf == NULL) return(0); |
---|
1164 | |
---|
1165 | htmlDocContentDumpFormatOutput(buf, cur, encoding, format); |
---|
1166 | |
---|
1167 | ret = xmlOutputBufferClose(buf); |
---|
1168 | return(ret); |
---|
1169 | } |
---|
1170 | |
---|
1171 | /** |
---|
1172 | * htmlSaveFileEnc: |
---|
1173 | * @filename: the filename |
---|
1174 | * @cur: the document |
---|
1175 | * @encoding: the document encoding |
---|
1176 | * |
---|
1177 | * Dump an HTML document to a file using a given encoding |
---|
1178 | * and formatting returns/spaces are added. |
---|
1179 | * |
---|
1180 | * returns: the number of byte written or -1 in case of failure. |
---|
1181 | */ |
---|
1182 | int |
---|
1183 | htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) { |
---|
1184 | return(htmlSaveFileFormat(filename, cur, encoding, 1)); |
---|
1185 | } |
---|
1186 | |
---|
1187 | #endif /* LIBXML_OUTPUT_ENABLED */ |
---|
1188 | |
---|
1189 | #endif /* LIBXML_HTML_ENABLED */ |
---|