1 | /* |
---|
2 | * testHTML.c : a small tester program for HTML input. |
---|
3 | * |
---|
4 | * See Copyright for the status of this software. |
---|
5 | * |
---|
6 | * daniel@veillard.com |
---|
7 | */ |
---|
8 | |
---|
9 | #include "libxml.h" |
---|
10 | |
---|
11 | #ifdef LIBXML_HTML_ENABLED |
---|
12 | |
---|
13 | #include <string.h> |
---|
14 | #include <stdarg.h> |
---|
15 | |
---|
16 | |
---|
17 | #ifdef HAVE_SYS_TYPES_H |
---|
18 | #include <sys/types.h> |
---|
19 | #endif |
---|
20 | #ifdef HAVE_SYS_STAT_H |
---|
21 | #include <sys/stat.h> |
---|
22 | #endif |
---|
23 | #ifdef HAVE_FCNTL_H |
---|
24 | #include <fcntl.h> |
---|
25 | #endif |
---|
26 | #ifdef HAVE_UNISTD_H |
---|
27 | #include <unistd.h> |
---|
28 | #endif |
---|
29 | #ifdef HAVE_STDLIB_H |
---|
30 | #include <stdlib.h> |
---|
31 | #endif |
---|
32 | |
---|
33 | #include <libxml/xmlmemory.h> |
---|
34 | #include <libxml/HTMLparser.h> |
---|
35 | #include <libxml/HTMLtree.h> |
---|
36 | #include <libxml/debugXML.h> |
---|
37 | #include <libxml/xmlerror.h> |
---|
38 | #include <libxml/globals.h> |
---|
39 | |
---|
40 | #ifdef LIBXML_DEBUG_ENABLED |
---|
41 | static int debug = 0; |
---|
42 | #endif |
---|
43 | static int copy = 0; |
---|
44 | static int sax = 0; |
---|
45 | static int repeat = 0; |
---|
46 | static int noout = 0; |
---|
47 | #ifdef LIBXML_PUSH_ENABLED |
---|
48 | static int push = 0; |
---|
49 | #endif /* LIBXML_PUSH_ENABLED */ |
---|
50 | static char *encoding = NULL; |
---|
51 | static int options = 0; |
---|
52 | |
---|
53 | xmlSAXHandler emptySAXHandlerStruct = { |
---|
54 | NULL, /* internalSubset */ |
---|
55 | NULL, /* isStandalone */ |
---|
56 | NULL, /* hasInternalSubset */ |
---|
57 | NULL, /* hasExternalSubset */ |
---|
58 | NULL, /* resolveEntity */ |
---|
59 | NULL, /* getEntity */ |
---|
60 | NULL, /* entityDecl */ |
---|
61 | NULL, /* notationDecl */ |
---|
62 | NULL, /* attributeDecl */ |
---|
63 | NULL, /* elementDecl */ |
---|
64 | NULL, /* unparsedEntityDecl */ |
---|
65 | NULL, /* setDocumentLocator */ |
---|
66 | NULL, /* startDocument */ |
---|
67 | NULL, /* endDocument */ |
---|
68 | NULL, /* startElement */ |
---|
69 | NULL, /* endElement */ |
---|
70 | NULL, /* reference */ |
---|
71 | NULL, /* characters */ |
---|
72 | NULL, /* ignorableWhitespace */ |
---|
73 | NULL, /* processingInstruction */ |
---|
74 | NULL, /* comment */ |
---|
75 | NULL, /* xmlParserWarning */ |
---|
76 | NULL, /* xmlParserError */ |
---|
77 | NULL, /* xmlParserError */ |
---|
78 | NULL, /* getParameterEntity */ |
---|
79 | NULL, /* cdataBlock */ |
---|
80 | NULL, /* externalSubset */ |
---|
81 | 1, /* initialized */ |
---|
82 | NULL, /* private */ |
---|
83 | NULL, /* startElementNsSAX2Func */ |
---|
84 | NULL, /* endElementNsSAX2Func */ |
---|
85 | NULL /* xmlStructuredErrorFunc */ |
---|
86 | }; |
---|
87 | |
---|
88 | xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct; |
---|
89 | extern xmlSAXHandlerPtr debugSAXHandler; |
---|
90 | |
---|
91 | /************************************************************************ |
---|
92 | * * |
---|
93 | * Debug Handlers * |
---|
94 | * * |
---|
95 | ************************************************************************/ |
---|
96 | |
---|
97 | /** |
---|
98 | * isStandaloneDebug: |
---|
99 | * @ctxt: An XML parser context |
---|
100 | * |
---|
101 | * Is this document tagged standalone ? |
---|
102 | * |
---|
103 | * Returns 1 if true |
---|
104 | */ |
---|
105 | static int |
---|
106 | isStandaloneDebug(void *ctx ATTRIBUTE_UNUSED) |
---|
107 | { |
---|
108 | fprintf(stdout, "SAX.isStandalone()\n"); |
---|
109 | return(0); |
---|
110 | } |
---|
111 | |
---|
112 | /** |
---|
113 | * hasInternalSubsetDebug: |
---|
114 | * @ctxt: An XML parser context |
---|
115 | * |
---|
116 | * Does this document has an internal subset |
---|
117 | * |
---|
118 | * Returns 1 if true |
---|
119 | */ |
---|
120 | static int |
---|
121 | hasInternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED) |
---|
122 | { |
---|
123 | fprintf(stdout, "SAX.hasInternalSubset()\n"); |
---|
124 | return(0); |
---|
125 | } |
---|
126 | |
---|
127 | /** |
---|
128 | * hasExternalSubsetDebug: |
---|
129 | * @ctxt: An XML parser context |
---|
130 | * |
---|
131 | * Does this document has an external subset |
---|
132 | * |
---|
133 | * Returns 1 if true |
---|
134 | */ |
---|
135 | static int |
---|
136 | hasExternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED) |
---|
137 | { |
---|
138 | fprintf(stdout, "SAX.hasExternalSubset()\n"); |
---|
139 | return(0); |
---|
140 | } |
---|
141 | |
---|
142 | /** |
---|
143 | * hasInternalSubsetDebug: |
---|
144 | * @ctxt: An XML parser context |
---|
145 | * |
---|
146 | * Does this document has an internal subset |
---|
147 | */ |
---|
148 | static void |
---|
149 | internalSubsetDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, |
---|
150 | const xmlChar *ExternalID, const xmlChar *SystemID) |
---|
151 | { |
---|
152 | fprintf(stdout, "SAX.internalSubset(%s,", name); |
---|
153 | if (ExternalID == NULL) |
---|
154 | fprintf(stdout, " ,"); |
---|
155 | else |
---|
156 | fprintf(stdout, " %s,", ExternalID); |
---|
157 | if (SystemID == NULL) |
---|
158 | fprintf(stdout, " )\n"); |
---|
159 | else |
---|
160 | fprintf(stdout, " %s)\n", SystemID); |
---|
161 | } |
---|
162 | |
---|
163 | /** |
---|
164 | * resolveEntityDebug: |
---|
165 | * @ctxt: An XML parser context |
---|
166 | * @publicId: The public ID of the entity |
---|
167 | * @systemId: The system ID of the entity |
---|
168 | * |
---|
169 | * Special entity resolver, better left to the parser, it has |
---|
170 | * more context than the application layer. |
---|
171 | * The default behaviour is to NOT resolve the entities, in that case |
---|
172 | * the ENTITY_REF nodes are built in the structure (and the parameter |
---|
173 | * values). |
---|
174 | * |
---|
175 | * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour. |
---|
176 | */ |
---|
177 | static xmlParserInputPtr |
---|
178 | resolveEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *publicId, const xmlChar *systemId) |
---|
179 | { |
---|
180 | /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */ |
---|
181 | |
---|
182 | |
---|
183 | fprintf(stdout, "SAX.resolveEntity("); |
---|
184 | if (publicId != NULL) |
---|
185 | fprintf(stdout, "%s", (char *)publicId); |
---|
186 | else |
---|
187 | fprintf(stdout, " "); |
---|
188 | if (systemId != NULL) |
---|
189 | fprintf(stdout, ", %s)\n", (char *)systemId); |
---|
190 | else |
---|
191 | fprintf(stdout, ", )\n"); |
---|
192 | /********* |
---|
193 | if (systemId != NULL) { |
---|
194 | return(xmlNewInputFromFile(ctxt, (char *) systemId)); |
---|
195 | } |
---|
196 | *********/ |
---|
197 | return(NULL); |
---|
198 | } |
---|
199 | |
---|
200 | /** |
---|
201 | * getEntityDebug: |
---|
202 | * @ctxt: An XML parser context |
---|
203 | * @name: The entity name |
---|
204 | * |
---|
205 | * Get an entity by name |
---|
206 | * |
---|
207 | * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour. |
---|
208 | */ |
---|
209 | static xmlEntityPtr |
---|
210 | getEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name) |
---|
211 | { |
---|
212 | fprintf(stdout, "SAX.getEntity(%s)\n", name); |
---|
213 | return(NULL); |
---|
214 | } |
---|
215 | |
---|
216 | /** |
---|
217 | * getParameterEntityDebug: |
---|
218 | * @ctxt: An XML parser context |
---|
219 | * @name: The entity name |
---|
220 | * |
---|
221 | * Get a parameter entity by name |
---|
222 | * |
---|
223 | * Returns the xmlParserInputPtr |
---|
224 | */ |
---|
225 | static xmlEntityPtr |
---|
226 | getParameterEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name) |
---|
227 | { |
---|
228 | fprintf(stdout, "SAX.getParameterEntity(%s)\n", name); |
---|
229 | return(NULL); |
---|
230 | } |
---|
231 | |
---|
232 | |
---|
233 | /** |
---|
234 | * entityDeclDebug: |
---|
235 | * @ctxt: An XML parser context |
---|
236 | * @name: the entity name |
---|
237 | * @type: the entity type |
---|
238 | * @publicId: The public ID of the entity |
---|
239 | * @systemId: The system ID of the entity |
---|
240 | * @content: the entity value (without processing). |
---|
241 | * |
---|
242 | * An entity definition has been parsed |
---|
243 | */ |
---|
244 | static void |
---|
245 | entityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type, |
---|
246 | const xmlChar *publicId, const xmlChar *systemId, xmlChar *content) |
---|
247 | { |
---|
248 | fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n", |
---|
249 | name, type, publicId, systemId, content); |
---|
250 | } |
---|
251 | |
---|
252 | /** |
---|
253 | * attributeDeclDebug: |
---|
254 | * @ctxt: An XML parser context |
---|
255 | * @name: the attribute name |
---|
256 | * @type: the attribute type |
---|
257 | * |
---|
258 | * An attribute definition has been parsed |
---|
259 | */ |
---|
260 | static void |
---|
261 | attributeDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *elem, const xmlChar *name, |
---|
262 | int type, int def, const xmlChar *defaultValue, |
---|
263 | xmlEnumerationPtr tree ATTRIBUTE_UNUSED) |
---|
264 | { |
---|
265 | fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n", |
---|
266 | elem, name, type, def, defaultValue); |
---|
267 | } |
---|
268 | |
---|
269 | /** |
---|
270 | * elementDeclDebug: |
---|
271 | * @ctxt: An XML parser context |
---|
272 | * @name: the element name |
---|
273 | * @type: the element type |
---|
274 | * @content: the element value (without processing). |
---|
275 | * |
---|
276 | * An element definition has been parsed |
---|
277 | */ |
---|
278 | static void |
---|
279 | elementDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type, |
---|
280 | xmlElementContentPtr content ATTRIBUTE_UNUSED) |
---|
281 | { |
---|
282 | fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n", |
---|
283 | name, type); |
---|
284 | } |
---|
285 | |
---|
286 | /** |
---|
287 | * notationDeclDebug: |
---|
288 | * @ctxt: An XML parser context |
---|
289 | * @name: The name of the notation |
---|
290 | * @publicId: The public ID of the entity |
---|
291 | * @systemId: The system ID of the entity |
---|
292 | * |
---|
293 | * What to do when a notation declaration has been parsed. |
---|
294 | */ |
---|
295 | static void |
---|
296 | notationDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, |
---|
297 | const xmlChar *publicId, const xmlChar *systemId) |
---|
298 | { |
---|
299 | fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n", |
---|
300 | (char *) name, (char *) publicId, (char *) systemId); |
---|
301 | } |
---|
302 | |
---|
303 | /** |
---|
304 | * unparsedEntityDeclDebug: |
---|
305 | * @ctxt: An XML parser context |
---|
306 | * @name: The name of the entity |
---|
307 | * @publicId: The public ID of the entity |
---|
308 | * @systemId: The system ID of the entity |
---|
309 | * @notationName: the name of the notation |
---|
310 | * |
---|
311 | * What to do when an unparsed entity declaration is parsed |
---|
312 | */ |
---|
313 | static void |
---|
314 | unparsedEntityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, |
---|
315 | const xmlChar *publicId, const xmlChar *systemId, |
---|
316 | const xmlChar *notationName) |
---|
317 | { |
---|
318 | fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n", |
---|
319 | (char *) name, (char *) publicId, (char *) systemId, |
---|
320 | (char *) notationName); |
---|
321 | } |
---|
322 | |
---|
323 | /** |
---|
324 | * setDocumentLocatorDebug: |
---|
325 | * @ctxt: An XML parser context |
---|
326 | * @loc: A SAX Locator |
---|
327 | * |
---|
328 | * Receive the document locator at startup, actually xmlDefaultSAXLocator |
---|
329 | * Everything is available on the context, so this is useless in our case. |
---|
330 | */ |
---|
331 | static void |
---|
332 | setDocumentLocatorDebug(void *ctx ATTRIBUTE_UNUSED, xmlSAXLocatorPtr loc ATTRIBUTE_UNUSED) |
---|
333 | { |
---|
334 | fprintf(stdout, "SAX.setDocumentLocator()\n"); |
---|
335 | } |
---|
336 | |
---|
337 | /** |
---|
338 | * startDocumentDebug: |
---|
339 | * @ctxt: An XML parser context |
---|
340 | * |
---|
341 | * called when the document start being processed. |
---|
342 | */ |
---|
343 | static void |
---|
344 | startDocumentDebug(void *ctx ATTRIBUTE_UNUSED) |
---|
345 | { |
---|
346 | fprintf(stdout, "SAX.startDocument()\n"); |
---|
347 | } |
---|
348 | |
---|
349 | /** |
---|
350 | * endDocumentDebug: |
---|
351 | * @ctxt: An XML parser context |
---|
352 | * |
---|
353 | * called when the document end has been detected. |
---|
354 | */ |
---|
355 | static void |
---|
356 | endDocumentDebug(void *ctx ATTRIBUTE_UNUSED) |
---|
357 | { |
---|
358 | fprintf(stdout, "SAX.endDocument()\n"); |
---|
359 | } |
---|
360 | |
---|
361 | /** |
---|
362 | * startElementDebug: |
---|
363 | * @ctxt: An XML parser context |
---|
364 | * @name: The element name |
---|
365 | * |
---|
366 | * called when an opening tag has been processed. |
---|
367 | */ |
---|
368 | static void |
---|
369 | startElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, const xmlChar **atts) |
---|
370 | { |
---|
371 | int i; |
---|
372 | |
---|
373 | fprintf(stdout, "SAX.startElement(%s", (char *) name); |
---|
374 | if (atts != NULL) { |
---|
375 | for (i = 0;(atts[i] != NULL);i++) { |
---|
376 | fprintf(stdout, ", %s", atts[i++]); |
---|
377 | if (atts[i] != NULL) { |
---|
378 | unsigned char output[40]; |
---|
379 | const unsigned char *att = atts[i]; |
---|
380 | int outlen, attlen; |
---|
381 | fprintf(stdout, "='"); |
---|
382 | while ((attlen = strlen((char*)att)) > 0) { |
---|
383 | outlen = sizeof output - 1; |
---|
384 | htmlEncodeEntities(output, &outlen, att, &attlen, '\''); |
---|
385 | output[outlen] = 0; |
---|
386 | fprintf(stdout, "%s", (char *) output); |
---|
387 | att += attlen; |
---|
388 | } |
---|
389 | fprintf(stdout, "'"); |
---|
390 | } |
---|
391 | } |
---|
392 | } |
---|
393 | fprintf(stdout, ")\n"); |
---|
394 | } |
---|
395 | |
---|
396 | /** |
---|
397 | * endElementDebug: |
---|
398 | * @ctxt: An XML parser context |
---|
399 | * @name: The element name |
---|
400 | * |
---|
401 | * called when the end of an element has been detected. |
---|
402 | */ |
---|
403 | static void |
---|
404 | endElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name) |
---|
405 | { |
---|
406 | fprintf(stdout, "SAX.endElement(%s)\n", (char *) name); |
---|
407 | } |
---|
408 | |
---|
409 | /** |
---|
410 | * charactersDebug: |
---|
411 | * @ctxt: An XML parser context |
---|
412 | * @ch: a xmlChar string |
---|
413 | * @len: the number of xmlChar |
---|
414 | * |
---|
415 | * receiving some chars from the parser. |
---|
416 | * Question: how much at a time ??? |
---|
417 | */ |
---|
418 | static void |
---|
419 | charactersDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len) |
---|
420 | { |
---|
421 | unsigned char output[40]; |
---|
422 | int inlen = len, outlen = 30; |
---|
423 | |
---|
424 | htmlEncodeEntities(output, &outlen, ch, &inlen, 0); |
---|
425 | output[outlen] = 0; |
---|
426 | |
---|
427 | fprintf(stdout, "SAX.characters(%s, %d)\n", output, len); |
---|
428 | } |
---|
429 | |
---|
430 | /** |
---|
431 | * cdataDebug: |
---|
432 | * @ctxt: An XML parser context |
---|
433 | * @ch: a xmlChar string |
---|
434 | * @len: the number of xmlChar |
---|
435 | * |
---|
436 | * receiving some cdata chars from the parser. |
---|
437 | * Question: how much at a time ??? |
---|
438 | */ |
---|
439 | static void |
---|
440 | cdataDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len) |
---|
441 | { |
---|
442 | unsigned char output[40]; |
---|
443 | int inlen = len, outlen = 30; |
---|
444 | |
---|
445 | htmlEncodeEntities(output, &outlen, ch, &inlen, 0); |
---|
446 | output[outlen] = 0; |
---|
447 | |
---|
448 | fprintf(stdout, "SAX.cdata(%s, %d)\n", output, len); |
---|
449 | } |
---|
450 | |
---|
451 | /** |
---|
452 | * referenceDebug: |
---|
453 | * @ctxt: An XML parser context |
---|
454 | * @name: The entity name |
---|
455 | * |
---|
456 | * called when an entity reference is detected. |
---|
457 | */ |
---|
458 | static void |
---|
459 | referenceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name) |
---|
460 | { |
---|
461 | fprintf(stdout, "SAX.reference(%s)\n", name); |
---|
462 | } |
---|
463 | |
---|
464 | /** |
---|
465 | * ignorableWhitespaceDebug: |
---|
466 | * @ctxt: An XML parser context |
---|
467 | * @ch: a xmlChar string |
---|
468 | * @start: the first char in the string |
---|
469 | * @len: the number of xmlChar |
---|
470 | * |
---|
471 | * receiving some ignorable whitespaces from the parser. |
---|
472 | * Question: how much at a time ??? |
---|
473 | */ |
---|
474 | static void |
---|
475 | ignorableWhitespaceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len) |
---|
476 | { |
---|
477 | char output[40]; |
---|
478 | int i; |
---|
479 | |
---|
480 | for (i = 0;(i<len) && (i < 30);i++) |
---|
481 | output[i] = ch[i]; |
---|
482 | output[i] = 0; |
---|
483 | |
---|
484 | fprintf(stdout, "SAX.ignorableWhitespace(%s, %d)\n", output, len); |
---|
485 | } |
---|
486 | |
---|
487 | /** |
---|
488 | * processingInstructionDebug: |
---|
489 | * @ctxt: An XML parser context |
---|
490 | * @target: the target name |
---|
491 | * @data: the PI data's |
---|
492 | * @len: the number of xmlChar |
---|
493 | * |
---|
494 | * A processing instruction has been parsed. |
---|
495 | */ |
---|
496 | static void |
---|
497 | processingInstructionDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *target, |
---|
498 | const xmlChar *data) |
---|
499 | { |
---|
500 | fprintf(stdout, "SAX.processingInstruction(%s, %s)\n", |
---|
501 | (char *) target, (char *) data); |
---|
502 | } |
---|
503 | |
---|
504 | /** |
---|
505 | * commentDebug: |
---|
506 | * @ctxt: An XML parser context |
---|
507 | * @value: the comment content |
---|
508 | * |
---|
509 | * A comment has been parsed. |
---|
510 | */ |
---|
511 | static void |
---|
512 | commentDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *value) |
---|
513 | { |
---|
514 | fprintf(stdout, "SAX.comment(%s)\n", value); |
---|
515 | } |
---|
516 | |
---|
517 | /** |
---|
518 | * warningDebug: |
---|
519 | * @ctxt: An XML parser context |
---|
520 | * @msg: the message to display/transmit |
---|
521 | * @...: extra parameters for the message display |
---|
522 | * |
---|
523 | * Display and format a warning messages, gives file, line, position and |
---|
524 | * extra parameters. |
---|
525 | */ |
---|
526 | static void |
---|
527 | warningDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...) |
---|
528 | { |
---|
529 | va_list args; |
---|
530 | |
---|
531 | va_start(args, msg); |
---|
532 | fprintf(stdout, "SAX.warning: "); |
---|
533 | vfprintf(stdout, msg, args); |
---|
534 | va_end(args); |
---|
535 | } |
---|
536 | |
---|
537 | /** |
---|
538 | * errorDebug: |
---|
539 | * @ctxt: An XML parser context |
---|
540 | * @msg: the message to display/transmit |
---|
541 | * @...: extra parameters for the message display |
---|
542 | * |
---|
543 | * Display and format a error messages, gives file, line, position and |
---|
544 | * extra parameters. |
---|
545 | */ |
---|
546 | static void |
---|
547 | errorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...) |
---|
548 | { |
---|
549 | va_list args; |
---|
550 | |
---|
551 | va_start(args, msg); |
---|
552 | fprintf(stdout, "SAX.error: "); |
---|
553 | vfprintf(stdout, msg, args); |
---|
554 | va_end(args); |
---|
555 | } |
---|
556 | |
---|
557 | /** |
---|
558 | * fatalErrorDebug: |
---|
559 | * @ctxt: An XML parser context |
---|
560 | * @msg: the message to display/transmit |
---|
561 | * @...: extra parameters for the message display |
---|
562 | * |
---|
563 | * Display and format a fatalError messages, gives file, line, position and |
---|
564 | * extra parameters. |
---|
565 | */ |
---|
566 | static void |
---|
567 | fatalErrorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...) |
---|
568 | { |
---|
569 | va_list args; |
---|
570 | |
---|
571 | va_start(args, msg); |
---|
572 | fprintf(stdout, "SAX.fatalError: "); |
---|
573 | vfprintf(stdout, msg, args); |
---|
574 | va_end(args); |
---|
575 | } |
---|
576 | |
---|
577 | xmlSAXHandler debugSAXHandlerStruct = { |
---|
578 | internalSubsetDebug, |
---|
579 | isStandaloneDebug, |
---|
580 | hasInternalSubsetDebug, |
---|
581 | hasExternalSubsetDebug, |
---|
582 | resolveEntityDebug, |
---|
583 | getEntityDebug, |
---|
584 | entityDeclDebug, |
---|
585 | notationDeclDebug, |
---|
586 | attributeDeclDebug, |
---|
587 | elementDeclDebug, |
---|
588 | unparsedEntityDeclDebug, |
---|
589 | setDocumentLocatorDebug, |
---|
590 | startDocumentDebug, |
---|
591 | endDocumentDebug, |
---|
592 | startElementDebug, |
---|
593 | endElementDebug, |
---|
594 | referenceDebug, |
---|
595 | charactersDebug, |
---|
596 | ignorableWhitespaceDebug, |
---|
597 | processingInstructionDebug, |
---|
598 | commentDebug, |
---|
599 | warningDebug, |
---|
600 | errorDebug, |
---|
601 | fatalErrorDebug, |
---|
602 | getParameterEntityDebug, |
---|
603 | cdataDebug, |
---|
604 | NULL, |
---|
605 | 1, |
---|
606 | NULL, |
---|
607 | NULL, |
---|
608 | NULL, |
---|
609 | NULL |
---|
610 | }; |
---|
611 | |
---|
612 | xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct; |
---|
613 | /************************************************************************ |
---|
614 | * * |
---|
615 | * Debug * |
---|
616 | * * |
---|
617 | ************************************************************************/ |
---|
618 | |
---|
619 | static void |
---|
620 | parseSAXFile(char *filename) { |
---|
621 | htmlDocPtr doc = NULL; |
---|
622 | |
---|
623 | /* |
---|
624 | * Empty callbacks for checking |
---|
625 | */ |
---|
626 | #ifdef LIBXML_PUSH_ENABLED |
---|
627 | if (push) { |
---|
628 | FILE *f; |
---|
629 | |
---|
630 | #if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__) |
---|
631 | f = fopen(filename, "rb"); |
---|
632 | #else |
---|
633 | f = fopen(filename, "r"); |
---|
634 | #endif |
---|
635 | if (f != NULL) { |
---|
636 | int res, size = 3; |
---|
637 | char chars[4096]; |
---|
638 | htmlParserCtxtPtr ctxt; |
---|
639 | |
---|
640 | /* if (repeat) */ |
---|
641 | size = 4096; |
---|
642 | res = fread(chars, 1, 4, f); |
---|
643 | if (res > 0) { |
---|
644 | ctxt = htmlCreatePushParserCtxt(emptySAXHandler, NULL, |
---|
645 | chars, res, filename, XML_CHAR_ENCODING_NONE); |
---|
646 | while ((res = fread(chars, 1, size, f)) > 0) { |
---|
647 | htmlParseChunk(ctxt, chars, res, 0); |
---|
648 | } |
---|
649 | htmlParseChunk(ctxt, chars, 0, 1); |
---|
650 | doc = ctxt->myDoc; |
---|
651 | htmlFreeParserCtxt(ctxt); |
---|
652 | } |
---|
653 | if (doc != NULL) { |
---|
654 | fprintf(stdout, "htmlSAXParseFile returned non-NULL\n"); |
---|
655 | xmlFreeDoc(doc); |
---|
656 | } |
---|
657 | fclose(f); |
---|
658 | } |
---|
659 | if (!noout) { |
---|
660 | #if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__) |
---|
661 | f = fopen(filename, "rb"); |
---|
662 | #else |
---|
663 | f = fopen(filename, "r"); |
---|
664 | #endif |
---|
665 | if (f != NULL) { |
---|
666 | int res, size = 3; |
---|
667 | char chars[4096]; |
---|
668 | htmlParserCtxtPtr ctxt; |
---|
669 | |
---|
670 | /* if (repeat) */ |
---|
671 | size = 4096; |
---|
672 | res = fread(chars, 1, 4, f); |
---|
673 | if (res > 0) { |
---|
674 | ctxt = htmlCreatePushParserCtxt(debugSAXHandler, NULL, |
---|
675 | chars, res, filename, XML_CHAR_ENCODING_NONE); |
---|
676 | while ((res = fread(chars, 1, size, f)) > 0) { |
---|
677 | htmlParseChunk(ctxt, chars, res, 0); |
---|
678 | } |
---|
679 | htmlParseChunk(ctxt, chars, 0, 1); |
---|
680 | doc = ctxt->myDoc; |
---|
681 | htmlFreeParserCtxt(ctxt); |
---|
682 | } |
---|
683 | if (doc != NULL) { |
---|
684 | fprintf(stdout, "htmlSAXParseFile returned non-NULL\n"); |
---|
685 | xmlFreeDoc(doc); |
---|
686 | } |
---|
687 | fclose(f); |
---|
688 | } |
---|
689 | } |
---|
690 | } else { |
---|
691 | #endif /* LIBXML_PUSH_ENABLED */ |
---|
692 | doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL); |
---|
693 | if (doc != NULL) { |
---|
694 | fprintf(stdout, "htmlSAXParseFile returned non-NULL\n"); |
---|
695 | xmlFreeDoc(doc); |
---|
696 | } |
---|
697 | |
---|
698 | if (!noout) { |
---|
699 | /* |
---|
700 | * Debug callback |
---|
701 | */ |
---|
702 | doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL); |
---|
703 | if (doc != NULL) { |
---|
704 | fprintf(stdout, "htmlSAXParseFile returned non-NULL\n"); |
---|
705 | xmlFreeDoc(doc); |
---|
706 | } |
---|
707 | } |
---|
708 | #ifdef LIBXML_PUSH_ENABLED |
---|
709 | } |
---|
710 | #endif /* LIBXML_PUSH_ENABLED */ |
---|
711 | } |
---|
712 | |
---|
713 | static void |
---|
714 | parseAndPrintFile(char *filename) { |
---|
715 | htmlDocPtr doc = NULL; |
---|
716 | |
---|
717 | /* |
---|
718 | * build an HTML tree from a string; |
---|
719 | */ |
---|
720 | #ifdef LIBXML_PUSH_ENABLED |
---|
721 | if (push) { |
---|
722 | FILE *f; |
---|
723 | |
---|
724 | #if defined(_WIN32) || defined (__DJGPP__) && !defined (__CYGWIN__) |
---|
725 | f = fopen(filename, "rb"); |
---|
726 | #else |
---|
727 | f = fopen(filename, "r"); |
---|
728 | #endif |
---|
729 | if (f != NULL) { |
---|
730 | int res, size = 3; |
---|
731 | char chars[4096]; |
---|
732 | htmlParserCtxtPtr ctxt; |
---|
733 | |
---|
734 | /* if (repeat) */ |
---|
735 | size = 4096; |
---|
736 | res = fread(chars, 1, 4, f); |
---|
737 | if (res > 0) { |
---|
738 | ctxt = htmlCreatePushParserCtxt(NULL, NULL, |
---|
739 | chars, res, filename, XML_CHAR_ENCODING_NONE); |
---|
740 | while ((res = fread(chars, 1, size, f)) > 0) { |
---|
741 | htmlParseChunk(ctxt, chars, res, 0); |
---|
742 | } |
---|
743 | htmlParseChunk(ctxt, chars, 0, 1); |
---|
744 | doc = ctxt->myDoc; |
---|
745 | htmlFreeParserCtxt(ctxt); |
---|
746 | } |
---|
747 | fclose(f); |
---|
748 | } |
---|
749 | } else { |
---|
750 | doc = htmlReadFile(filename, NULL, options); |
---|
751 | } |
---|
752 | #else |
---|
753 | doc = htmlReadFile(filename,NULL,options); |
---|
754 | #endif |
---|
755 | if (doc == NULL) { |
---|
756 | xmlGenericError(xmlGenericErrorContext, |
---|
757 | "Could not parse %s\n", filename); |
---|
758 | } |
---|
759 | |
---|
760 | #ifdef LIBXML_TREE_ENABLED |
---|
761 | /* |
---|
762 | * test intermediate copy if needed. |
---|
763 | */ |
---|
764 | if (copy) { |
---|
765 | htmlDocPtr tmp; |
---|
766 | |
---|
767 | tmp = doc; |
---|
768 | doc = xmlCopyDoc(doc, 1); |
---|
769 | xmlFreeDoc(tmp); |
---|
770 | } |
---|
771 | #endif |
---|
772 | |
---|
773 | #ifdef LIBXML_OUTPUT_ENABLED |
---|
774 | /* |
---|
775 | * print it. |
---|
776 | */ |
---|
777 | if (!noout) { |
---|
778 | #ifdef LIBXML_DEBUG_ENABLED |
---|
779 | if (!debug) { |
---|
780 | if (encoding) |
---|
781 | htmlSaveFileEnc("-", doc, encoding); |
---|
782 | else |
---|
783 | htmlDocDump(stdout, doc); |
---|
784 | } else |
---|
785 | xmlDebugDumpDocument(stdout, doc); |
---|
786 | #else |
---|
787 | if (encoding) |
---|
788 | htmlSaveFileEnc("-", doc, encoding); |
---|
789 | else |
---|
790 | htmlDocDump(stdout, doc); |
---|
791 | #endif |
---|
792 | } |
---|
793 | #endif /* LIBXML_OUTPUT_ENABLED */ |
---|
794 | |
---|
795 | /* |
---|
796 | * free it. |
---|
797 | */ |
---|
798 | xmlFreeDoc(doc); |
---|
799 | } |
---|
800 | |
---|
801 | int main(int argc, char **argv) { |
---|
802 | int i, count; |
---|
803 | int files = 0; |
---|
804 | |
---|
805 | for (i = 1; i < argc ; i++) { |
---|
806 | #ifdef LIBXML_DEBUG_ENABLED |
---|
807 | if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug"))) |
---|
808 | debug++; |
---|
809 | else |
---|
810 | #endif |
---|
811 | if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy"))) |
---|
812 | copy++; |
---|
813 | #ifdef LIBXML_PUSH_ENABLED |
---|
814 | else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push"))) |
---|
815 | push++; |
---|
816 | #endif /* LIBXML_PUSH_ENABLED */ |
---|
817 | else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax"))) |
---|
818 | sax++; |
---|
819 | else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout"))) |
---|
820 | noout++; |
---|
821 | else if ((!strcmp(argv[i], "-repeat")) || |
---|
822 | (!strcmp(argv[i], "--repeat"))) |
---|
823 | repeat++; |
---|
824 | else if ((!strcmp(argv[i], "-encode")) || |
---|
825 | (!strcmp(argv[i], "--encode"))) { |
---|
826 | i++; |
---|
827 | encoding = argv[i]; |
---|
828 | } |
---|
829 | } |
---|
830 | for (i = 1; i < argc ; i++) { |
---|
831 | if ((!strcmp(argv[i], "-encode")) || |
---|
832 | (!strcmp(argv[i], "--encode"))) { |
---|
833 | i++; |
---|
834 | continue; |
---|
835 | } |
---|
836 | if (argv[i][0] != '-') { |
---|
837 | if (repeat) { |
---|
838 | for (count = 0;count < 100 * repeat;count++) { |
---|
839 | if (sax) |
---|
840 | parseSAXFile(argv[i]); |
---|
841 | else |
---|
842 | parseAndPrintFile(argv[i]); |
---|
843 | } |
---|
844 | } else { |
---|
845 | if (sax) |
---|
846 | parseSAXFile(argv[i]); |
---|
847 | else |
---|
848 | parseAndPrintFile(argv[i]); |
---|
849 | } |
---|
850 | files ++; |
---|
851 | } |
---|
852 | } |
---|
853 | if (files == 0) { |
---|
854 | printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n", |
---|
855 | argv[0]); |
---|
856 | printf("\tParse the HTML files and output the result of the parsing\n"); |
---|
857 | #ifdef LIBXML_DEBUG_ENABLED |
---|
858 | printf("\t--debug : dump a debug tree of the in-memory document\n"); |
---|
859 | #endif |
---|
860 | printf("\t--copy : used to test the internal copy implementation\n"); |
---|
861 | printf("\t--sax : debug the sequence of SAX callbacks\n"); |
---|
862 | printf("\t--repeat : parse the file 100 times, for timing\n"); |
---|
863 | printf("\t--noout : do not print the result\n"); |
---|
864 | #ifdef LIBXML_PUSH_ENABLED |
---|
865 | printf("\t--push : use the push mode parser\n"); |
---|
866 | #endif /* LIBXML_PUSH_ENABLED */ |
---|
867 | printf("\t--encode encoding : output in the given encoding\n"); |
---|
868 | } |
---|
869 | xmlCleanupParser(); |
---|
870 | xmlMemoryDump(); |
---|
871 | |
---|
872 | return(0); |
---|
873 | } |
---|
874 | #else /* !LIBXML_HTML_ENABLED */ |
---|
875 | #include <stdio.h> |
---|
876 | int main(int argc ATTRIBUTE_UNUSED, char **argv ATTRIBUTE_UNUSED) { |
---|
877 | printf("%s : HTML support not compiled in\n", argv[0]); |
---|
878 | return(0); |
---|
879 | } |
---|
880 | #endif |
---|