Context Navigation

source: trunk/third/libxml2/HTMLtree.c @ 20735

Visit:

Revision 20735, 29.9 KB checked in by ghudson, 20 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r20734, which included commits to RCS files with non-trunk default branches.

Line
1	/*
2	* HTMLtree.c : implementation of access function for an HTML tree.
3	*
4	* See Copyright for the status of this software.
5	*
6	* daniel@veillard.com
7	*/
8
9
10	#define IN_LIBXML
11	#include "libxml.h"
12	#ifdef LIBXML_HTML_ENABLED
13
14	#include <string.h> /* for memset() only ! */
15
16	#ifdef HAVE_CTYPE_H
17	#include <ctype.h>
18	#endif
19	#ifdef HAVE_STDLIB_H
20	#include <stdlib.h>
21	#endif
22
23	#include <libxml/xmlmemory.h>
24	#include <libxml/HTMLparser.h>
25	#include <libxml/HTMLtree.h>
26	#include <libxml/entities.h>
27	#include <libxml/valid.h>
28	#include <libxml/xmlerror.h>
29	#include <libxml/parserInternals.h>
30	#include <libxml/globals.h>
31	#include <libxml/uri.h>
32
33	/************************************************************************
34	* *
35	* Getting/Setting encoding meta tags *
36	* *
37	************************************************************************/
38
39	/**
40	* htmlGetMetaEncoding:
41	* @doc: the document
42	*
43	* Encoding definition lookup in the Meta tags
44	*
45	* Returns the current encoding as flagged in the HTML source
46	*/
47	const xmlChar *
48	htmlGetMetaEncoding(htmlDocPtr doc) {
49	htmlNodePtr cur;
50	const xmlChar *content;
51	const xmlChar *encoding;
52
53	if (doc == NULL)
54	return(NULL);
55	cur = doc->children;
56
57	/*
58	* Search the html
59	*/
60	while (cur != NULL) {
61	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
62	if (xmlStrEqual(cur->name, BAD_CAST"html"))
63	break;
64	if (xmlStrEqual(cur->name, BAD_CAST"head"))
65	goto found_head;
66	if (xmlStrEqual(cur->name, BAD_CAST"meta"))
67	goto found_meta;
68	}
69	cur = cur->next;
70	}
71	if (cur == NULL)
72	return(NULL);
73	cur = cur->children;
74
75	/*
76	* Search the head
77	*/
78	while (cur != NULL) {
79	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
80	if (xmlStrEqual(cur->name, BAD_CAST"head"))
81	break;
82	if (xmlStrEqual(cur->name, BAD_CAST"meta"))
83	goto found_meta;
84	}
85	cur = cur->next;
86	}
87	if (cur == NULL)
88	return(NULL);
89	found_head:
90	cur = cur->children;
91
92	/*
93	* Search the meta elements
94	*/
95	found_meta:
96	while (cur != NULL) {
97	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
98	if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
99	xmlAttrPtr attr = cur->properties;
100	int http;
101	const xmlChar *value;
102
103	content = NULL;
104	http = 0;
105	while (attr != NULL) {
106	if ((attr->children != NULL) &&
107	(attr->children->type == XML_TEXT_NODE) &&
108	(attr->children->next == NULL)) {
109	value = attr->children->content;
110	if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
111	&& (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
112	http = 1;
113	else if ((value != NULL)
114	&& (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
115	content = value;
116	if ((http != 0) && (content != NULL))
117	goto found_content;
118	}
119	attr = attr->next;
120	}
121	}
122	}
123	cur = cur->next;
124	}
125	return(NULL);
126
127	found_content:
128	encoding = xmlStrstr(content, BAD_CAST"charset=");
129	if (encoding == NULL)
130	encoding = xmlStrstr(content, BAD_CAST"Charset=");
131	if (encoding == NULL)
132	encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
133	if (encoding != NULL) {
134	encoding += 8;
135	} else {
136	encoding = xmlStrstr(content, BAD_CAST"charset =");
137	if (encoding == NULL)
138	encoding = xmlStrstr(content, BAD_CAST"Charset =");
139	if (encoding == NULL)
140	encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
141	if (encoding != NULL)
142	encoding += 9;
143	}
144	if (encoding != NULL) {
145	while ((encoding == ' ') \|\| (encoding == '\t')) encoding++;
146	}
147	return(encoding);
148	}
149
150	/**
151	* htmlSetMetaEncoding:
152	* @doc: the document
153	* @encoding: the encoding string
154	*
155	* Sets the current encoding in the Meta tags
156	* NOTE: this will not change the document content encoding, just
157	* the META flag associated.
158	*
159	* Returns 0 in case of success and -1 in case of error
160	*/
161	int
162	htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
163	htmlNodePtr cur, meta;
164	const xmlChar *content;
165	char newcontent[100];
166
167
168	if (doc == NULL)
169	return(-1);
170
171	if (encoding != NULL) {
172	snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
173	encoding);
174	newcontent[sizeof(newcontent) - 1] = 0;
175	}
176
177	cur = doc->children;
178
179	/*
180	* Search the html
181	*/
182	while (cur != NULL) {
183	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
184	if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
185	break;
186	if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
187	goto found_head;
188	if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
189	goto found_meta;
190	}
191	cur = cur->next;
192	}
193	if (cur == NULL)
194	return(-1);
195	cur = cur->children;
196
197	/*
198	* Search the head
199	*/
200	while (cur != NULL) {
201	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
202	if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
203	break;
204	if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
205	goto found_meta;
206	}
207	cur = cur->next;
208	}
209	if (cur == NULL)
210	return(-1);
211	found_head:
212	if (cur->children == NULL) {
213	if (encoding == NULL)
214	return(0);
215	meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
216	xmlAddChild(cur, meta);
217	xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
218	xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
219	return(0);
220	}
221	cur = cur->children;
222
223	found_meta:
224	if (encoding != NULL) {
225	/*
226	* Create a new Meta element with the right attributes
227	*/
228
229	meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
230	xmlAddPrevSibling(cur, meta);
231	xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
232	xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
233	}
234
235	/*
236	* Search and destroy all the remaining the meta elements carrying
237	* encoding informations
238	*/
239	while (cur != NULL) {
240	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
241	if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
242	xmlAttrPtr attr = cur->properties;
243	int http;
244	const xmlChar *value;
245
246	content = NULL;
247	http = 0;
248	while (attr != NULL) {
249	if ((attr->children != NULL) &&
250	(attr->children->type == XML_TEXT_NODE) &&
251	(attr->children->next == NULL)) {
252	value = attr->children->content;
253	if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
254	&& (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
255	http = 1;
256	else
257	{
258	if ((value != NULL) &&
259	(!xmlStrcasecmp(attr->name, BAD_CAST"content")))
260	content = value;
261	}
262	if ((http != 0) && (content != NULL))
263	break;
264	}
265	attr = attr->next;
266	}
267	if ((http != 0) && (content != NULL)) {
268	meta = cur;
269	cur = cur->next;
270	xmlUnlinkNode(meta);
271	xmlFreeNode(meta);
272	continue;
273	}
274
275	}
276	}
277	cur = cur->next;
278	}
279	return(0);
280	}
281
282	/**
283	* booleanHTMLAttrs:
284	*
285	* These are the HTML attributes which will be output
286	* in minimized form, i.e. <option selected="selected"> will be
287	* output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
288	*
289	*/
290	static const char* htmlBooleanAttrs[] = {
291	"checked", "compact", "declare", "defer", "disabled", "ismap",
292	"multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
293	"selected", NULL
294	};
295
296
297	/**
298	* htmlIsBooleanAttr:
299	* @name: the name of the attribute to check
300	*
301	* Determine if a given attribute is a boolean attribute.
302	*
303	* returns: false if the attribute is not boolean, true otherwise.
304	*/
305	int
306	htmlIsBooleanAttr(const xmlChar *name)
307	{
308	int i = 0;
309
310	while (htmlBooleanAttrs[i] != NULL) {
311	if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
312	return 1;
313	i++;
314	}
315	return 0;
316	}
317
318	#ifdef LIBXML_OUTPUT_ENABLED
319	/************************************************************************
320	* *
321	* Output error handlers *
322	* *
323	************************************************************************/
324	/**
325	* htmlSaveErrMemory:
326	* @extra: extra informations
327	*
328	* Handle an out of memory condition
329	*/
330	static void
331	htmlSaveErrMemory(const char *extra)
332	{
333	__xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
334	}
335
336	/**
337	* htmlSaveErr:
338	* @code: the error number
339	* @node: the location of the error.
340	* @extra: extra informations
341	*
342	* Handle an out of memory condition
343	*/
344	static void
345	htmlSaveErr(int code, xmlNodePtr node, const char *extra)
346	{
347	const char *msg = NULL;
348
349	switch(code) {
350	case XML_SAVE_NOT_UTF8:
351	msg = "string is not in UTF-8";
352	break;
353	case XML_SAVE_CHAR_INVALID:
354	msg = "invalid character value";
355	break;
356	case XML_SAVE_UNKNOWN_ENCODING:
357	msg = "unknown encoding %s";
358	break;
359	case XML_SAVE_NO_DOCTYPE:
360	msg = "HTML has no DOCTYPE";
361	break;
362	default:
363	msg = "unexpected error number";
364	}
365	__xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
366	}
367
368	/************************************************************************
369	* *
370	* Dumping HTML tree content to a simple buffer *
371	* *
372	************************************************************************/
373
374	static int
375	htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
376	int format);
377
378	/**
379	* htmlNodeDumpFormat:
380	* @buf: the HTML buffer output
381	* @doc: the document
382	* @cur: the current node
383	* @format: should formatting spaces been added
384	*
385	* Dump an HTML node, recursive behaviour,children are printed too.
386	*
387	* Returns the number of byte written or -1 in case of error
388	*/
389	static int
390	htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
391	int format) {
392	unsigned int use;
393	int ret;
394	xmlOutputBufferPtr outbuf;
395
396	if (cur == NULL) {
397	return (-1);
398	}
399	if (buf == NULL) {
400	return (-1);
401	}
402	outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
403	if (outbuf == NULL) {
404	htmlSaveErrMemory("allocating HTML output buffer");
405	return (-1);
406	}
407	memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
408	outbuf->buffer = buf;
409	outbuf->encoder = NULL;
410	outbuf->writecallback = NULL;
411	outbuf->closecallback = NULL;
412	outbuf->context = NULL;
413	outbuf->written = 0;
414
415	use = buf->use;
416	htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
417	xmlFree(outbuf);
418	ret = buf->use - use;
419	return (ret);
420	}
421
422	/**
423	* htmlNodeDump:
424	* @buf: the HTML buffer output
425	* @doc: the document
426	* @cur: the current node
427	*
428	* Dump an HTML node, recursive behaviour,children are printed too,
429	* and formatting returns are added.
430	*
431	* Returns the number of byte written or -1 in case of error
432	*/
433	int
434	htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
435	xmlInitParser();
436
437	return(htmlNodeDumpFormat(buf, doc, cur, 1));
438	}
439
440	/**
441	* htmlNodeDumpFileFormat:
442	* @out: the FILE pointer
443	* @doc: the document
444	* @cur: the current node
445	* @encoding: the document encoding
446	* @format: should formatting spaces been added
447	*
448	* Dump an HTML node, recursive behaviour,children are printed too.
449	*
450	* TODO: if encoding == NULL try to save in the doc encoding
451	*
452	* returns: the number of byte written or -1 in case of failure.
453	*/
454	int
455	htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
456	xmlNodePtr cur, const char *encoding, int format) {
457	xmlOutputBufferPtr buf;
458	xmlCharEncodingHandlerPtr handler = NULL;
459	int ret;
460
461	xmlInitParser();
462
463	if (encoding != NULL) {
464	xmlCharEncoding enc;
465
466	enc = xmlParseCharEncoding(encoding);
467	if (enc != XML_CHAR_ENCODING_UTF8) {
468	handler = xmlFindCharEncodingHandler(encoding);
469	if (handler == NULL)
470	return(-1);
471	}
472	}
473
474	/*
475	* Fallback to HTML or ASCII when the encoding is unspecified
476	*/
477	if (handler == NULL)
478	handler = xmlFindCharEncodingHandler("HTML");
479	if (handler == NULL)
480	handler = xmlFindCharEncodingHandler("ascii");
481
482	/*
483	* save the content to a temp buffer.
484	*/
485	buf = xmlOutputBufferCreateFile(out, handler);
486	if (buf == NULL) return(0);
487
488	htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
489
490	ret = xmlOutputBufferClose(buf);
491	return(ret);
492	}
493
494	/**
495	* htmlNodeDumpFile:
496	* @out: the FILE pointer
497	* @doc: the document
498	* @cur: the current node
499	*
500	* Dump an HTML node, recursive behaviour,children are printed too,
501	* and formatting returns are added.
502	*/
503	void
504	htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
505	htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
506	}
507
508	/**
509	* htmlDocDumpMemory:
510	* @cur: the document
511	* @mem: OUT: the memory pointer
512	* @size: OUT: the memory length
513	*
514	* Dump an HTML document in memory and return the xmlChar * and it's size.
515	* It's up to the caller to free the memory.
516	*/
517	void
518	htmlDocDumpMemory(xmlDocPtr cur, xmlChar*mem, int size) {
519	xmlOutputBufferPtr buf;
520	xmlCharEncodingHandlerPtr handler = NULL;
521	const char *encoding;
522
523	xmlInitParser();
524
525	if (cur == NULL) {
526	*mem = NULL;
527	*size = 0;
528	return;
529	}
530
531	encoding = (const char *) htmlGetMetaEncoding(cur);
532
533	if (encoding != NULL) {
534	xmlCharEncoding enc;
535
536	enc = xmlParseCharEncoding(encoding);
537	if (enc != cur->charset) {
538	if (cur->charset != XML_CHAR_ENCODING_UTF8) {
539	/*
540	* Not supported yet
541	*/
542	*mem = NULL;
543	*size = 0;
544	return;
545	}
546
547	handler = xmlFindCharEncodingHandler(encoding);
548	if (handler == NULL) {
549	*mem = NULL;
550	*size = 0;
551	return;
552	}
553	}
554	}
555
556	/*
557	* Fallback to HTML or ASCII when the encoding is unspecified
558	*/
559	if (handler == NULL)
560	handler = xmlFindCharEncodingHandler("HTML");
561	if (handler == NULL)
562	handler = xmlFindCharEncodingHandler("ascii");
563
564	buf = xmlAllocOutputBuffer(handler);
565	if (buf == NULL) {
566	*mem = NULL;
567	*size = 0;
568	return;
569	}
570
571	htmlDocContentDumpOutput(buf, cur, NULL);
572	xmlOutputBufferFlush(buf);
573	if (buf->conv != NULL) {
574	*size = buf->conv->use;
575	mem = xmlStrndup(buf->conv->content, size);
576	} else {
577	*size = buf->buffer->use;
578	mem = xmlStrndup(buf->buffer->content, size);
579	}
580	(void)xmlOutputBufferClose(buf);
581	}
582
583
584	/************************************************************************
585	* *
586	* Dumping HTML tree content to an I/O output buffer *
587	* *
588	************************************************************************/
589
590	void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
591
592	/**
593	* htmlDtdDumpOutput:
594	* @buf: the HTML buffer output
595	* @doc: the document
596	* @encoding: the encoding string
597	*
598	* TODO: check whether encoding is needed
599	*
600	* Dump the HTML document DTD, if any.
601	*/
602	static void
603	htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
604	const char *encoding ATTRIBUTE_UNUSED) {
605	xmlDtdPtr cur = doc->intSubset;
606
607	if (cur == NULL) {
608	htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
609	return;
610	}
611	xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
612	xmlOutputBufferWriteString(buf, (const char *)cur->name);
613	if (cur->ExternalID != NULL) {
614	xmlOutputBufferWriteString(buf, " PUBLIC ");
615	xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
616	if (cur->SystemID != NULL) {
617	xmlOutputBufferWriteString(buf, " ");
618	xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
619	}
620	} else if (cur->SystemID != NULL) {
621	xmlOutputBufferWriteString(buf, " SYSTEM ");
622	xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
623	}
624	xmlOutputBufferWriteString(buf, ">\n");
625	}
626
627	/**
628	* htmlAttrDumpOutput:
629	* @buf: the HTML buffer output
630	* @doc: the document
631	* @cur: the attribute pointer
632	* @encoding: the encoding string
633	*
634	* Dump an HTML attribute
635	*/
636	static void
637	htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
638	const char *encoding ATTRIBUTE_UNUSED) {
639	xmlChar *value;
640
641	/*
642	* TODO: The html output method should not escape a & character
643	* occurring in an attribute value immediately followed by
644	* a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
645	*/
646
647	if (cur == NULL) {
648	return;
649	}
650	xmlOutputBufferWriteString(buf, " ");
651	if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
652	xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
653	xmlOutputBufferWriteString(buf, ":");
654	}
655	xmlOutputBufferWriteString(buf, (const char *)cur->name);
656	if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
657	value = xmlNodeListGetString(doc, cur->children, 0);
658	if (value) {
659	xmlOutputBufferWriteString(buf, "=");
660	if ((cur->ns == NULL) && (cur->parent != NULL) &&
661	(cur->parent->ns == NULL) &&
662	((!xmlStrcasecmp(cur->name, BAD_CAST "href")) \|\|
663	(!xmlStrcasecmp(cur->name, BAD_CAST "action")) \|\|
664	(!xmlStrcasecmp(cur->name, BAD_CAST "src")))) {
665	xmlChar *escaped;
666	xmlChar *tmp = value;
667
668	while (IS_BLANK_CH(*tmp)) tmp++;
669
670	escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
671	if (escaped != NULL) {
672	xmlBufferWriteQuotedString(buf->buffer, escaped);
673	xmlFree(escaped);
674	} else {
675	xmlBufferWriteQuotedString(buf->buffer, value);
676	}
677	} else {
678	xmlBufferWriteQuotedString(buf->buffer, value);
679	}
680	xmlFree(value);
681	} else {
682	xmlOutputBufferWriteString(buf, "=\"\"");
683	}
684	}
685	}
686
687	/**
688	* htmlAttrListDumpOutput:
689	* @buf: the HTML buffer output
690	* @doc: the document
691	* @cur: the first attribute pointer
692	* @encoding: the encoding string
693	*
694	* Dump a list of HTML attributes
695	*/
696	static void
697	htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
698	if (cur == NULL) {
699	return;
700	}
701	while (cur != NULL) {
702	htmlAttrDumpOutput(buf, doc, cur, encoding);
703	cur = cur->next;
704	}
705	}
706
707
708
709	/**
710	* htmlNodeListDumpOutput:
711	* @buf: the HTML buffer output
712	* @doc: the document
713	* @cur: the first node
714	* @encoding: the encoding string
715	* @format: should formatting spaces been added
716	*
717	* Dump an HTML node list, recursive behaviour,children are printed too.
718	*/
719	static void
720	htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
721	xmlNodePtr cur, const char *encoding, int format) {
722	if (cur == NULL) {
723	return;
724	}
725	while (cur != NULL) {
726	htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
727	cur = cur->next;
728	}
729	}
730
731	/**
732	* htmlNodeDumpFormatOutput:
733	* @buf: the HTML buffer output
734	* @doc: the document
735	* @cur: the current node
736	* @encoding: the encoding string
737	* @format: should formatting spaces been added
738	*
739	* Dump an HTML node, recursive behaviour,children are printed too.
740	*/
741	void
742	htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
743	xmlNodePtr cur, const char *encoding, int format) {
744	const htmlElemDesc * info;
745
746	xmlInitParser();
747
748	if (cur == NULL) {
749	return;
750	}
751	/*
752	* Special cases.
753	*/
754	if (cur->type == XML_DTD_NODE)
755	return;
756	if (cur->type == XML_HTML_DOCUMENT_NODE) {
757	htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
758	return;
759	}
760	if (cur->type == HTML_TEXT_NODE) {
761	if (cur->content != NULL) {
762	if (((cur->name == (const xmlChar *)xmlStringText) \|\|
763	(cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
764	((cur->parent == NULL) \|\|
765	((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
766	(xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
767	xmlChar *buffer;
768
769	buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
770	if (buffer != NULL) {
771	xmlOutputBufferWriteString(buf, (const char *)buffer);
772	xmlFree(buffer);
773	}
774	} else {
775	xmlOutputBufferWriteString(buf, (const char *)cur->content);
776	}
777	}
778	return;
779	}
780	if (cur->type == HTML_COMMENT_NODE) {
781	if (cur->content != NULL) {
782	xmlOutputBufferWriteString(buf, "<!--");
783	xmlOutputBufferWriteString(buf, (const char *)cur->content);
784	xmlOutputBufferWriteString(buf, "-->");
785	}
786	return;
787	}
788	if (cur->type == HTML_PI_NODE) {
789	if (cur->name == NULL)
790	return;
791	xmlOutputBufferWriteString(buf, "<?");
792	xmlOutputBufferWriteString(buf, (const char *)cur->name);
793	if (cur->content != NULL) {
794	xmlOutputBufferWriteString(buf, " ");
795	xmlOutputBufferWriteString(buf, (const char *)cur->content);
796	}
797	xmlOutputBufferWriteString(buf, ">");
798	return;
799	}
800	if (cur->type == HTML_ENTITY_REF_NODE) {
801	xmlOutputBufferWriteString(buf, "&");
802	xmlOutputBufferWriteString(buf, (const char *)cur->name);
803	xmlOutputBufferWriteString(buf, ";");
804	return;
805	}
806	if (cur->type == HTML_PRESERVE_NODE) {
807	if (cur->content != NULL) {
808	xmlOutputBufferWriteString(buf, (const char *)cur->content);
809	}
810	return;
811	}
812
813	/*
814	* Get specific HTML info for that node.
815	*/
816	if (cur->ns == NULL)
817	info = htmlTagLookup(cur->name);
818	else
819	info = NULL;
820
821	xmlOutputBufferWriteString(buf, "<");
822	if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
823	xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
824	xmlOutputBufferWriteString(buf, ":");
825	}
826	xmlOutputBufferWriteString(buf, (const char *)cur->name);
827	if (cur->nsDef)
828	xmlNsListDumpOutput(buf, cur->nsDef);
829	if (cur->properties != NULL)
830	htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
831
832	if ((info != NULL) && (info->empty)) {
833	xmlOutputBufferWriteString(buf, ">");
834	if ((format) && (!info->isinline) && (cur->next != NULL)) {
835	if ((cur->next->type != HTML_TEXT_NODE) &&
836	(cur->next->type != HTML_ENTITY_REF_NODE) &&
837	(cur->parent != NULL) &&
838	(cur->parent->name != NULL) &&
839	(cur->parent->name[0] != 'p')) /* p, pre, param */
840	xmlOutputBufferWriteString(buf, "\n");
841	}
842	return;
843	}
844	if (((cur->type == XML_ELEMENT_NODE) \|\| (cur->content == NULL)) &&
845	(cur->children == NULL)) {
846	if ((info != NULL) && (info->saveEndTag != 0) &&
847	(xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
848	(xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
849	xmlOutputBufferWriteString(buf, ">");
850	} else {
851	xmlOutputBufferWriteString(buf, "></");
852	if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
853	xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
854	xmlOutputBufferWriteString(buf, ":");
855	}
856	xmlOutputBufferWriteString(buf, (const char *)cur->name);
857	xmlOutputBufferWriteString(buf, ">");
858	}
859	if ((format) && (cur->next != NULL) &&
860	(info != NULL) && (!info->isinline)) {
861	if ((cur->next->type != HTML_TEXT_NODE) &&
862	(cur->next->type != HTML_ENTITY_REF_NODE) &&
863	(cur->parent != NULL) &&
864	(cur->parent->name != NULL) &&
865	(cur->parent->name[0] != 'p')) /* p, pre, param */
866	xmlOutputBufferWriteString(buf, "\n");
867	}
868	return;
869	}
870	xmlOutputBufferWriteString(buf, ">");
871	if ((cur->type != XML_ELEMENT_NODE) &&
872	(cur->content != NULL)) {
873	/*
874	* Uses the OutputBuffer property to automatically convert
875	* invalids to charrefs
876	*/
877
878	xmlOutputBufferWriteString(buf, (const char *) cur->content);
879	}
880	if (cur->children != NULL) {
881	if ((format) && (info != NULL) && (!info->isinline) &&
882	(cur->children->type != HTML_TEXT_NODE) &&
883	(cur->children->type != HTML_ENTITY_REF_NODE) &&
884	(cur->children != cur->last) &&
885	(cur->name != NULL) &&
886	(cur->name[0] != 'p')) /* p, pre, param */
887	xmlOutputBufferWriteString(buf, "\n");
888	htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
889	if ((format) && (info != NULL) && (!info->isinline) &&
890	(cur->last->type != HTML_TEXT_NODE) &&
891	(cur->last->type != HTML_ENTITY_REF_NODE) &&
892	(cur->children != cur->last) &&
893	(cur->name != NULL) &&
894	(cur->name[0] != 'p')) /* p, pre, param */
895	xmlOutputBufferWriteString(buf, "\n");
896	}
897	xmlOutputBufferWriteString(buf, "</");
898	if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
899	xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
900	xmlOutputBufferWriteString(buf, ":");
901	}
902	xmlOutputBufferWriteString(buf, (const char *)cur->name);
903	xmlOutputBufferWriteString(buf, ">");
904	if ((format) && (info != NULL) && (!info->isinline) &&
905	(cur->next != NULL)) {
906	if ((cur->next->type != HTML_TEXT_NODE) &&
907	(cur->next->type != HTML_ENTITY_REF_NODE) &&
908	(cur->parent != NULL) &&
909	(cur->parent->name != NULL) &&
910	(cur->parent->name[0] != 'p')) /* p, pre, param */
911	xmlOutputBufferWriteString(buf, "\n");
912	}
913	}
914
915	/**
916	* htmlNodeDumpOutput:
917	* @buf: the HTML buffer output
918	* @doc: the document
919	* @cur: the current node
920	* @encoding: the encoding string
921	*
922	* Dump an HTML node, recursive behaviour,children are printed too,
923	* and formatting returns/spaces are added.
924	*/
925	void
926	htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
927	xmlNodePtr cur, const char *encoding) {
928	htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
929	}
930
931	/**
932	* htmlDocContentDumpFormatOutput:
933	* @buf: the HTML buffer output
934	* @cur: the document
935	* @encoding: the encoding string
936	* @format: should formatting spaces been added
937	*
938	* Dump an HTML document.
939	*/
940	void
941	htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
942	const char *encoding, int format) {
943	int type;
944
945	xmlInitParser();
946
947	/*
948	* force to output the stuff as HTML, especially for entities
949	*/
950	type = cur->type;
951	cur->type = XML_HTML_DOCUMENT_NODE;
952	if (cur->intSubset != NULL) {
953	htmlDtdDumpOutput(buf, cur, NULL);
954	}
955	if (cur->children != NULL) {
956	htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
957	}
958	xmlOutputBufferWriteString(buf, "\n");
959	cur->type = (xmlElementType) type;
960	}
961
962	/**
963	* htmlDocContentDumpOutput:
964	* @buf: the HTML buffer output
965	* @cur: the document
966	* @encoding: the encoding string
967	*
968	* Dump an HTML document. Formating return/spaces are added.
969	*/
970	void
971	htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
972	const char *encoding) {
973	htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
974	}
975
976	/************************************************************************
977	* *
978	* Saving functions front-ends *
979	* *
980	************************************************************************/
981
982	/**
983	* htmlDocDump:
984	* @f: the FILE*
985	* @cur: the document
986	*
987	* Dump an HTML document to an open FILE.
988	*
989	* returns: the number of byte written or -1 in case of failure.
990	*/
991	int
992	htmlDocDump(FILE *f, xmlDocPtr cur) {
993	xmlOutputBufferPtr buf;
994	xmlCharEncodingHandlerPtr handler = NULL;
995	const char *encoding;
996	int ret;
997
998	xmlInitParser();
999
1000	if (cur == NULL) {
1001	return(-1);
1002	}
1003
1004	encoding = (const char *) htmlGetMetaEncoding(cur);
1005
1006	if (encoding != NULL) {
1007	xmlCharEncoding enc;
1008
1009	enc = xmlParseCharEncoding(encoding);
1010	if (enc != cur->charset) {
1011	if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1012	/*
1013	* Not supported yet
1014	*/
1015	return(-1);
1016	}
1017
1018	handler = xmlFindCharEncodingHandler(encoding);
1019	if (handler == NULL)
1020	return(-1);
1021	}
1022	}
1023
1024	/*
1025	* Fallback to HTML or ASCII when the encoding is unspecified
1026	*/
1027	if (handler == NULL)
1028	handler = xmlFindCharEncodingHandler("HTML");
1029	if (handler == NULL)
1030	handler = xmlFindCharEncodingHandler("ascii");
1031
1032	buf = xmlOutputBufferCreateFile(f, handler);
1033	if (buf == NULL) return(-1);
1034	htmlDocContentDumpOutput(buf, cur, NULL);
1035
1036	ret = xmlOutputBufferClose(buf);
1037	return(ret);
1038	}
1039
1040	/**
1041	* htmlSaveFile:
1042	* @filename: the filename (or URL)
1043	* @cur: the document
1044	*
1045	* Dump an HTML document to a file. If @filename is "-" the stdout file is
1046	* used.
1047	* returns: the number of byte written or -1 in case of failure.
1048	*/
1049	int
1050	htmlSaveFile(const char *filename, xmlDocPtr cur) {
1051	xmlOutputBufferPtr buf;
1052	xmlCharEncodingHandlerPtr handler = NULL;
1053	const char *encoding;
1054	int ret;
1055
1056	xmlInitParser();
1057
1058	encoding = (const char *) htmlGetMetaEncoding(cur);
1059
1060	if (encoding != NULL) {
1061	xmlCharEncoding enc;
1062
1063	enc = xmlParseCharEncoding(encoding);
1064	if (enc != cur->charset) {
1065	if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1066	/*
1067	* Not supported yet
1068	*/
1069	return(-1);
1070	}
1071
1072	handler = xmlFindCharEncodingHandler(encoding);
1073	if (handler == NULL)
1074	return(-1);
1075	}
1076	}
1077
1078	/*
1079	* Fallback to HTML or ASCII when the encoding is unspecified
1080	*/
1081	if (handler == NULL)
1082	handler = xmlFindCharEncodingHandler("HTML");
1083	if (handler == NULL)
1084	handler = xmlFindCharEncodingHandler("ascii");
1085
1086	/*
1087	* save the content to a temp buffer.
1088	*/
1089	buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1090	if (buf == NULL) return(0);
1091
1092	htmlDocContentDumpOutput(buf, cur, NULL);
1093
1094	ret = xmlOutputBufferClose(buf);
1095	return(ret);
1096	}
1097
1098	/**
1099	* htmlSaveFileFormat:
1100	* @filename: the filename
1101	* @cur: the document
1102	* @format: should formatting spaces been added
1103	* @encoding: the document encoding
1104	*
1105	* Dump an HTML document to a file using a given encoding.
1106	*
1107	* returns: the number of byte written or -1 in case of failure.
1108	*/
1109	int
1110	htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1111	const char *encoding, int format) {
1112	xmlOutputBufferPtr buf;
1113	xmlCharEncodingHandlerPtr handler = NULL;
1114	int ret;
1115
1116	xmlInitParser();
1117
1118	if (encoding != NULL) {
1119	xmlCharEncoding enc;
1120
1121	enc = xmlParseCharEncoding(encoding);
1122	if (enc != cur->charset) {
1123	if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1124	/*
1125	* Not supported yet
1126	*/
1127	return(-1);
1128	}
1129
1130	handler = xmlFindCharEncodingHandler(encoding);
1131	if (handler == NULL)
1132	return(-1);
1133	htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1134	}
1135	} else {
1136	htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
1137	}
1138
1139	/*
1140	* Fallback to HTML or ASCII when the encoding is unspecified
1141	*/
1142	if (handler == NULL)
1143	handler = xmlFindCharEncodingHandler("HTML");
1144	if (handler == NULL)
1145	handler = xmlFindCharEncodingHandler("ascii");
1146
1147	/*
1148	* save the content to a temp buffer.
1149	*/
1150	buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1151	if (buf == NULL) return(0);
1152
1153	htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
1154
1155	ret = xmlOutputBufferClose(buf);
1156	return(ret);
1157	}
1158
1159	/**
1160	* htmlSaveFileEnc:
1161	* @filename: the filename
1162	* @cur: the document
1163	* @encoding: the document encoding
1164	*
1165	* Dump an HTML document to a file using a given encoding
1166	* and formatting returns/spaces are added.
1167	*
1168	* returns: the number of byte written or -1 in case of failure.
1169	*/
1170	int
1171	htmlSaveFileEnc(const char filename, xmlDocPtr cur, const char encoding) {
1172	return(htmlSaveFileFormat(filename, cur, encoding, 1));
1173	}
1174
1175	#endif /* LIBXML_OUTPUT_ENABLED */
1176
1177	#endif /* LIBXML_HTML_ENABLED */

Note: See TracBrowser for help on using the repository browser.

Download in other formats: