1 | /* |
---|
2 | * HTMLparser.h : inf=terface for an HTML 4.0 non-verifying parser |
---|
3 | * |
---|
4 | * See Copyright for the status of this software. |
---|
5 | * |
---|
6 | * Daniel.Veillard@w3.org |
---|
7 | */ |
---|
8 | |
---|
9 | #ifndef __HTML_PARSER_H__ |
---|
10 | #define __HTML_PARSER_H__ |
---|
11 | #include "parser.h" |
---|
12 | |
---|
13 | #ifdef __cplusplus |
---|
14 | extern "C" { |
---|
15 | #endif |
---|
16 | |
---|
17 | /* |
---|
18 | * Most of the back-end structures from XML and HTML are shared |
---|
19 | */ |
---|
20 | typedef xmlParserCtxt htmlParserCtxt; |
---|
21 | typedef xmlParserCtxtPtr htmlParserCtxtPtr; |
---|
22 | typedef xmlParserNodeInfo htmlParserNodeInfo; |
---|
23 | typedef xmlSAXHandler htmlSAXHandler; |
---|
24 | typedef xmlSAXHandlerPtr htmlSAXHandlerPtr; |
---|
25 | typedef xmlParserInput htmlParserInput; |
---|
26 | typedef xmlParserInputPtr htmlParserInputPtr; |
---|
27 | typedef xmlDocPtr htmlDocPtr; |
---|
28 | typedef xmlNodePtr htmlNodePtr; |
---|
29 | |
---|
30 | /* |
---|
31 | * Internal description of an HTML element |
---|
32 | */ |
---|
33 | typedef struct _htmlElemDesc htmlElemDesc; |
---|
34 | typedef htmlElemDesc *htmlElemDescPtr; |
---|
35 | struct _htmlElemDesc { |
---|
36 | const char *name; /* The tag name */ |
---|
37 | int startTag; /* Whether the start tag can be implied */ |
---|
38 | int endTag; /* Whether the end tag can be implied */ |
---|
39 | int empty; /* Is this an empty element ? */ |
---|
40 | int depr; /* Is this a deprecated element ? */ |
---|
41 | int dtd; /* 1: only in Loose DTD, 2: only Frameset one */ |
---|
42 | const char *desc; /* the description */ |
---|
43 | }; |
---|
44 | |
---|
45 | /* |
---|
46 | * Internal description of an HTML entity |
---|
47 | */ |
---|
48 | typedef struct _htmlEntityDesc htmlEntityDesc; |
---|
49 | typedef htmlEntityDesc *htmlEntityDescPtr; |
---|
50 | struct _htmlEntityDesc { |
---|
51 | int value; /* the UNICODE value for the character */ |
---|
52 | const char *name; /* The entity name */ |
---|
53 | const char *desc; /* the description */ |
---|
54 | }; |
---|
55 | |
---|
56 | /* |
---|
57 | * There is only few public functions. |
---|
58 | */ |
---|
59 | htmlElemDescPtr htmlTagLookup (const xmlChar *tag); |
---|
60 | htmlEntityDescPtr htmlEntityLookup(const xmlChar *name); |
---|
61 | |
---|
62 | int htmlIsAutoClosed(htmlDocPtr doc, |
---|
63 | htmlNodePtr elem); |
---|
64 | int htmlAutoCloseTag(htmlDocPtr doc, |
---|
65 | const xmlChar *name, |
---|
66 | htmlNodePtr elem); |
---|
67 | htmlEntityDescPtr htmlParseEntityRef(htmlParserCtxtPtr ctxt, |
---|
68 | xmlChar **str); |
---|
69 | int htmlParseCharRef(htmlParserCtxtPtr ctxt); |
---|
70 | void htmlParseElement(htmlParserCtxtPtr ctxt); |
---|
71 | |
---|
72 | htmlDocPtr htmlSAXParseDoc (xmlChar *cur, |
---|
73 | const char *encoding, |
---|
74 | htmlSAXHandlerPtr sax, |
---|
75 | void *userData); |
---|
76 | htmlDocPtr htmlParseDoc (xmlChar *cur, |
---|
77 | const char *encoding); |
---|
78 | htmlDocPtr htmlSAXParseFile(const char *filename, |
---|
79 | const char *encoding, |
---|
80 | htmlSAXHandlerPtr sax, |
---|
81 | void *userData); |
---|
82 | htmlDocPtr htmlParseFile (const char *filename, |
---|
83 | const char *encoding); |
---|
84 | |
---|
85 | /** |
---|
86 | * Interfaces for the Push mode |
---|
87 | */ |
---|
88 | void htmlFreeParserCtxt (htmlParserCtxtPtr ctxt); |
---|
89 | htmlParserCtxtPtr htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, |
---|
90 | void *user_data, |
---|
91 | const char *chunk, |
---|
92 | int size, |
---|
93 | const char *filename, |
---|
94 | xmlCharEncoding enc); |
---|
95 | int htmlParseChunk (htmlParserCtxtPtr ctxt, |
---|
96 | const char *chunk, |
---|
97 | int size, |
---|
98 | int terminate); |
---|
99 | #ifdef __cplusplus |
---|
100 | } |
---|
101 | #endif |
---|
102 | |
---|
103 | #endif /* __HTML_PARSER_H__ */ |
---|