source: trunk/third/mozilla/htmlparser/public/nsHTMLTokens.h @ 20551

Revision 20551, 16.7 KB checked in by rbasch, 20 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r20550, which included commits to RCS files with non-trunk default branches.
Line 
1
2/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
3/*
4 * The contents of this file are subject to the Netscape Public
5 * License Version 1.1 (the "License"); you may not use this file
6 * except in compliance with the License. You may obtain a copy of
7 * the License at http://www.mozilla.org/NPL/
8 *
9 * Software distributed under the License is distributed on an "AS
10 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
11 * implied. See the License for the specific language governing
12 * rights and limitations under the License.
13 *
14 * The Original Code is mozilla.org code.
15 *
16 * The Initial Developer of the Original Code is Netscape
17 * Communications Corporation.  Portions created by Netscape are
18 * Copyright (C) 1998 Netscape Communications Corporation. All
19 * Rights Reserved.
20 *
21 * Contributor(s):
22 */
23
24/**
25 * MODULE NOTES:
26 * @update  gess 4/1/98
27 * 
28 * This file contains the declarations for all the HTML specific token types that
29 * our DTD's understand. In fact, the same set of token types are used for XML.
30 * Currently we have tokens for text, comments, start and end tags, entities,
31 * attributes, style, script and skipped content. Whitespace and newlines also
32 * have their own token types, but don't count on them to stay forever.
33 *
34 * If you're looking for the html tags, they're in a file called nsHTMLTag.h/cpp.
35 *
36 * Most of the token types have a similar API. They have methods to get the type
37 * of token (GetTokenType); those that represent HTML tags also have a method to
38 * get type tag type (GetTypeID). In addition, most have a method that causes the
39 * token to help in the parsing process called (Consume). We've also thrown in a
40 * few standard debugging methods as well.
41 */
42
43#ifndef HTMLTOKENS_H
44#define HTMLTOKENS_H
45
46#include "nsToken.h"
47#include "nsHTMLTags.h"
48#include "nsParserError.h"
49#include "nsString.h"
50#include "nsScannerString.h"
51
52class nsScanner;
53
54  /*******************************************************************
55   * This enum defines the set of token types that we currently support.
56   *******************************************************************/
57
58enum eHTMLTokenTypes {
59  eToken_unknown=0,
60  eToken_start=1,      eToken_end,     eToken_comment,         eToken_entity,
61  eToken_whitespace,   eToken_newline, eToken_text,            eToken_attribute,
62  eToken_script,       eToken_style,   eToken_skippedcontent,  eToken_instruction,
63  eToken_cdatasection, eToken_error,   eToken_doctypeDecl,     eToken_markupDecl,
64  eToken_last //make sure this stays the last token...
65};
66
67enum eHTMLCategory {
68  eHTMLCategory_unknown=0,
69  eHTMLCategory_inline,
70  eHTMLCategory_block,
71  eHTMLCategory_blockAndInline,
72  eHTMLCategory_list,
73  eHTMLCategory_table,
74  eHTMLCategory_tablepart,
75  eHTMLCategory_tablerow,
76  eHTMLCategory_tabledata,
77  eHTMLCategory_head,
78  eHTMLCategory_html,
79  eHTMLCategory_body,
80  eHTMLCategory_form,
81  eHTMLCategory_options,
82  eHTMLCategory_frameset,
83  eHTMLCategory_text
84};
85
86
87nsresult      ConsumeQuotedString(PRUnichar aChar,nsString& aString,nsScanner& aScanner);
88nsresult      ConsumeAttributeText(PRUnichar aChar,nsString& aString,nsScanner& aScanner);
89const PRUnichar* GetTagName(PRInt32 aTag);
90//PRInt32     FindEntityIndex(nsString& aString,PRInt32 aCount=-1);
91
92
93
94/**
95 *  This declares the basic token type used in the HTML DTD's.
96 *  @update  gess 3/25/98
97 */
98class CHTMLToken : public CToken {
99public:
100    virtual             ~CHTMLToken();
101
102                        CHTMLToken(eHTMLTags aTag);
103
104    virtual eContainerInfo GetContainerInfo(void) const {return eFormUnknown;}
105    virtual void           SetContainerInfo(eContainerInfo aInfo) { }
106
107protected:
108};
109
110/**
111 *  This declares start tokens, which always take the form <xxxx>.
112 *      This class also knows how to consume related attributes.
113 * 
114 *  @update  gess 3/25/98
115 */
116class CStartToken: public CHTMLToken {
117  CTOKEN_IMPL_SIZEOF
118
119  public:
120                          CStartToken(eHTMLTags aTag=eHTMLTag_unknown);
121                          CStartToken(const nsAString& aString);
122                          CStartToken(const nsAString& aName,eHTMLTags aTag);
123
124    virtual nsresult      Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
125    virtual PRInt32       GetTypeID(void);
126    virtual const char*   GetClassName(void);
127    virtual PRInt32       GetTokenType(void);
128
129    virtual PRBool        IsEmpty(void);
130    virtual void          SetEmpty(PRBool aValue);
131
132    virtual const nsAString& GetStringValue();
133    virtual void          GetSource(nsString& anOutputString);
134    virtual void          AppendSourceTo(nsAString& anOutputString);
135
136      //the following info is used to set well-formedness state on start tags...
137    virtual eContainerInfo GetContainerInfo(void) const {return mContainerInfo;}
138    virtual void           SetContainerInfo(eContainerInfo aContainerInfo) {mContainerInfo=aContainerInfo;}
139    virtual PRBool         IsWellFormed(void) const {return PRBool(eWellFormed==mContainerInfo);}
140
141
142    /*
143     * Get and set the ID attribute atom for this element. 
144     * See http://www.w3.org/TR/1998/REC-xml-19980210#sec-attribute-types
145     * for the definition of an ID attribute.
146     *
147     */
148    virtual nsresult      GetIDAttributeAtom(nsIAtom** aResult);
149    virtual nsresult      SetIDAttributeAtom(nsIAtom* aID);
150 
151            nsString          mTextValue;
152            nsString          mTrailingContent;
153  protected:   
154            eContainerInfo    mContainerInfo;
155            nsCOMPtr<nsIAtom> mIDAttributeAtom;
156            PRPackedBool      mEmpty; 
157#ifdef DEBUG
158            PRPackedBool      mAttributed;
159#endif
160};
161
162
163/**
164 *  This declares end tokens, which always take the
165 *  form </xxxx>. This class also knows how to consume
166 *  related attributes.
167 * 
168 *  @update  gess 3/25/98
169 */
170class CEndToken: public CHTMLToken {
171  CTOKEN_IMPL_SIZEOF
172
173  public:
174                        CEndToken(eHTMLTags aTag);
175                        CEndToken(const nsAString& aString);
176                        CEndToken(const nsAString& aName,eHTMLTags aTag);
177    virtual nsresult    Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
178    virtual PRInt32     GetTypeID(void);
179    virtual const char* GetClassName(void);
180    virtual PRInt32     GetTokenType(void);
181
182    virtual const nsAString& GetStringValue();
183    virtual void        GetSource(nsString& anOutputString);
184    virtual void        AppendSourceTo(nsAString& anOutputString);
185
186  protected:
187    nsString          mTextValue;
188};
189
190
191/**
192 *  This declares comment tokens. Comments are usually
193 *  thought of as tokens, but we treat them that way
194 *  here so that the parser can have a consistent view
195 *  of all tokens.
196 * 
197 *  @update  gess 3/25/98
198 */
199class CCommentToken: public CHTMLToken {
200  CTOKEN_IMPL_SIZEOF
201
202  public:
203                        CCommentToken();
204                        CCommentToken(const nsAString& aString);
205    virtual nsresult    Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
206    virtual const char* GetClassName(void);
207    virtual PRInt32     GetTokenType(void);
208    virtual const nsAString& GetStringValue(void);
209    virtual void        AppendSourceTo(nsAString& anOutputString);
210
211    nsresult ConsumeStrictComment(nsScanner& aScanner);
212    nsresult ConsumeQuirksComment(nsScanner& aScanner);
213
214  protected:
215    nsScannerSubstring mComment; // does not include MDO & MDC
216    nsScannerSubstring mCommentDecl; // includes MDO & MDC
217};
218
219
220/**
221 *  This class declares entity tokens, which always take
222 *  the form &xxxx;. This class also offers a few utility
223 *  methods that allow you to easily reduce entities.
224 * 
225 *  @update  gess 3/25/98
226 */
227class CEntityToken : public CHTMLToken {
228  CTOKEN_IMPL_SIZEOF
229
230  public:
231                        CEntityToken();
232                        CEntityToken(const nsAString& aString);
233    virtual const char* GetClassName(void);
234    virtual PRInt32     GetTokenType(void);
235            PRInt32     TranslateToUnicodeStr(nsString& aString);
236    virtual nsresult    Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
237    static  nsresult    ConsumeEntity(PRUnichar aChar,nsString& aString,nsScanner& aScanner);
238    static  PRInt32     TranslateToUnicodeStr(PRInt32 aValue,nsString& aString);
239
240    virtual const nsAString& GetStringValue(void);
241    virtual void        GetSource(nsString& anOutputString);
242    virtual void        AppendSourceTo(nsAString& anOutputString);
243
244  protected:
245    nsString          mTextValue;
246};
247
248
249/**
250 *  Whitespace tokens are used where whitespace can be
251 *  detected as distinct from text. This allows us to
252 *  easily skip leading/trailing whitespace when desired.
253 * 
254 *  @update  gess 3/25/98
255 */
256class CWhitespaceToken: public CHTMLToken {
257  CTOKEN_IMPL_SIZEOF
258
259  public:
260                        CWhitespaceToken();
261                        CWhitespaceToken(const nsAString& aString);
262    virtual nsresult    Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
263    virtual const char* GetClassName(void);
264    virtual PRInt32     GetTokenType(void);
265    virtual const nsAString& GetStringValue(void);
266
267  protected:
268    nsString          mTextValue;
269};
270
271/**
272 *  Text tokens contain the normalized form of html text.
273 *  These tokens are guaranteed not to contain entities,
274 *  start or end tags, or newlines.
275 * 
276 *  @update  gess 3/25/98
277 */
278class CTextToken: public CHTMLToken {
279  CTOKEN_IMPL_SIZEOF
280
281  public:
282                        CTextToken();
283                        CTextToken(const nsAString& aString);
284    virtual nsresult    Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
285            nsresult    ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScanner& aScanner,
286                                     nsString& aEndTagName,PRInt32 aFlag,PRBool& aFlushTokens);
287    virtual const char* GetClassName(void);
288    virtual PRInt32     GetTokenType(void);
289    virtual PRInt32     GetTextLength(void);
290    virtual void        CopyTo(nsAString& aStr);
291    virtual const nsAString& GetStringValue(void);
292    virtual void        Bind(nsScanner* aScanner, nsScannerIterator& aStart, nsScannerIterator& aEnd);
293    virtual void        Bind(const nsAString& aStr);
294
295  protected:
296    nsScannerSubstring          mTextValue;
297};
298
299
300/**
301 *  CDATASection tokens contain raw unescaped text content delimited by
302 *  a ![CDATA[ and ]].
303 *  XXX Not really a HTML construct - maybe we need a separation
304 * 
305 *  @update  vidur 11/12/98
306 */
307class CCDATASectionToken : public CHTMLToken {
308  CTOKEN_IMPL_SIZEOF
309
310public:
311                        CCDATASectionToken(eHTMLTags aTag = eHTMLTag_unknown);
312                        CCDATASectionToken(const nsAString& aString);
313    virtual nsresult    Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
314    virtual const char* GetClassName(void);
315    virtual PRInt32     GetTokenType(void); 
316    virtual const nsAString& GetStringValue(void);
317
318  protected:
319    nsString          mTextValue;
320};
321
322
323/**
324 *  Declaration tokens contain raw unescaped text content (not really, but
325 *  right now we use this only for view source).
326 *  XXX Not really a HTML construct - maybe we need a separation
327 * 
328 */
329class CMarkupDeclToken : public CHTMLToken {
330  CTOKEN_IMPL_SIZEOF
331
332public:
333                        CMarkupDeclToken();
334                        CMarkupDeclToken(const nsAString& aString);
335    virtual nsresult    Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
336    virtual const char* GetClassName(void);
337    virtual PRInt32     GetTokenType(void); 
338    virtual const nsAString& GetStringValue(void);
339
340protected:
341    nsScannerSubstring  mTextValue;
342};
343
344
345/**
346 *  Attribute tokens are used to contain attribute key/value
347 *  pairs whereever they may occur. Typically, they should
348 *  occur only in start tokens. However, we may expand that
349 *  ability when XML tokens become commonplace.
350 * 
351 *  @update  gess 3/25/98
352 */
353class CAttributeToken: public CHTMLToken {
354  CTOKEN_IMPL_SIZEOF
355
356  public:
357                          CAttributeToken();
358                          CAttributeToken(const nsAString& aString);
359                          CAttributeToken(const nsAString& aKey, const nsAString& aString);
360                           ~CAttributeToken() {}
361    virtual nsresult      Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
362    virtual const char*   GetClassName(void);
363    virtual PRInt32       GetTokenType(void);
364    virtual const nsAString&     GetKey(void); // XXX {return mTextKey;}
365    virtual void          SetKey(const nsAString& aKey);
366    virtual void          BindKey(nsScanner* aScanner, nsScannerIterator& aStart, nsScannerIterator& aEnd);
367    virtual const nsAString&     GetValue(void) {return mTextValue;}
368    virtual void          SanitizeKey();
369    virtual const nsAString& GetStringValue(void);
370    virtual void          GetSource(nsString& anOutputString);
371    virtual void          AppendSourceTo(nsAString& anOutputString);
372   
373    PRPackedBool       mHasEqualWithoutValue;
374  protected:
375#ifdef DEBUG
376    PRPackedBool       mLastAttribute;
377#endif
378    nsAutoString       mTextValue;
379    nsScannerSubstring mTextKey;
380};
381
382
383/**
384 *  Newline tokens contain, you guessed it, newlines.
385 *  They consume newline (CR/LF) either alone or in pairs.
386 * 
387 *  @update  gess 3/25/98
388 */
389class CNewlineToken: public CHTMLToken {
390  CTOKEN_IMPL_SIZEOF
391
392  public:
393                        CNewlineToken();
394    virtual nsresult    Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
395    virtual const char* GetClassName(void);
396    virtual PRInt32     GetTokenType(void);
397    virtual const nsAString&   GetStringValue(void);
398
399    static void AllocNewline();
400    static void FreeNewline();
401};
402
403
404/**
405 *  Script tokens contain sequences of javascript (or, gulp,
406 *  any other script you care to send). We don't tokenize
407 *  it here, nor validate it. We just wrap it up, and pass
408 *  it along to the html parser, who sends it (later on)
409 *  to the scripting engine.
410 * 
411 *  @update  gess 3/25/98
412 */
413class CScriptToken: public CHTMLToken {
414  CTOKEN_IMPL_SIZEOF
415
416  public:
417                        CScriptToken();
418                        CScriptToken(const nsAString& aString);
419    virtual const char* GetClassName(void);
420    virtual PRInt32     GetTokenType(void);
421    virtual const nsAString&   GetStringValue(void);
422
423  protected:
424    nsString          mTextValue;
425};
426
427
428/**
429 *  Style tokens contain sequences of css style. We don't
430 *  tokenize it here, nor validate it. We just wrap it up,
431 *  and pass it along to the html parser, who sends it
432 *  (later on) to the style engine.
433 * 
434 *  @update  gess 3/25/98
435 */
436class CStyleToken: public CHTMLToken {
437  CTOKEN_IMPL_SIZEOF
438
439  public:
440                         CStyleToken();
441                         CStyleToken(const nsAString& aString);
442    virtual const char*  GetClassName(void);
443    virtual PRInt32      GetTokenType(void);
444    virtual const nsAString&   GetStringValue(void);
445
446  protected:
447    nsString          mTextValue;
448};
449
450
451/**
452 *  Whitespace tokens are used where whitespace can be
453 *  detected as distinct from text. This allows us to
454 *  easily skip leading/trailing whitespace when desired.
455 * 
456 *  @update  gess 3/25/98
457 */
458class CInstructionToken: public CHTMLToken {
459  CTOKEN_IMPL_SIZEOF
460
461  public:
462                        CInstructionToken();
463                        CInstructionToken(const nsAString& aString);
464    virtual nsresult    Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
465    virtual const char* GetClassName(void);
466    virtual PRInt32     GetTokenType(void);
467    virtual const nsAString&   GetStringValue(void);
468
469  protected:
470    nsString          mTextValue;
471};
472
473class CErrorToken : public CHTMLToken {
474  CTOKEN_IMPL_SIZEOF
475
476public:
477  CErrorToken(nsParserError* aError=0);
478  ~CErrorToken();
479  virtual const char* GetClassName(void);
480  virtual PRInt32     GetTokenType(void);
481 
482  void SetError(nsParserError* aError);  // CErrorToken takes ownership of aError
483
484  // The nsParserError object returned by GetError is still owned by CErrorToken.
485  // DO NOT use the delete operator on it.  Should we change this so that a copy
486  // of nsParserError is returned which needs to be destroyed by the consumer?
487  const nsParserError* GetError(void);   
488
489    virtual const nsAString&   GetStringValue(void);
490protected:
491  nsString          mTextValue;
492  nsParserError* mError;
493};
494
495/**
496 * This token is generated by the HTML and Expat tokenizers
497 * when they see the doctype declaration ("<!DOCTYPE ... >")
498 *
499 */
500
501class CDoctypeDeclToken: public CHTMLToken {
502  CTOKEN_IMPL_SIZEOF
503
504public:
505                        CDoctypeDeclToken(eHTMLTags aTag=eHTMLTag_unknown);
506                        CDoctypeDeclToken(const nsAString& aString,eHTMLTags aTag=eHTMLTag_unknown);
507    virtual nsresult    Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
508    virtual const char* GetClassName(void);
509    virtual PRInt32     GetTokenType(void);
510    virtual const nsAString& GetStringValue(void);
511    virtual void SetStringValue(const nsAString& aStr);
512
513  protected:
514    nsString          mTextValue;
515};
516
517#endif
Note: See TracBrowser for help on using the repository browser.