1 | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
---|
2 | /* ***** BEGIN LICENSE BLOCK ***** |
---|
3 | * Version: NPL 1.1/GPL 2.0/LGPL 2.1 |
---|
4 | * |
---|
5 | * The contents of this file are subject to the Netscape Public License |
---|
6 | * Version 1.1 (the "License"); you may not use this file except in |
---|
7 | * compliance with the License. You may obtain a copy of the License at |
---|
8 | * http://www.mozilla.org/NPL/ |
---|
9 | * |
---|
10 | * Software distributed under the License is distributed on an "AS IS" basis, |
---|
11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License |
---|
12 | * for the specific language governing rights and limitations under the |
---|
13 | * License. |
---|
14 | * |
---|
15 | * The Original Code is mozilla.org code. |
---|
16 | * |
---|
17 | * The Initial Developer of the Original Code is |
---|
18 | * Netscape Communications Corporation. |
---|
19 | * Portions created by the Initial Developer are Copyright (C) 1998 |
---|
20 | * the Initial Developer. All Rights Reserved. |
---|
21 | * |
---|
22 | * Contributor(s): |
---|
23 | * |
---|
24 | * |
---|
25 | * Alternatively, the contents of this file may be used under the terms of |
---|
26 | * either the GNU General Public License Version 2 or later (the "GPL"), or |
---|
27 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), |
---|
28 | * in which case the provisions of the GPL or the LGPL are applicable instead |
---|
29 | * of those above. If you wish to allow use of your version of this file only |
---|
30 | * under the terms of either the GPL or the LGPL, and not to allow others to |
---|
31 | * use your version of this file under the terms of the NPL, indicate your |
---|
32 | * decision by deleting the provisions above and replace them with the notice |
---|
33 | * and other provisions required by the GPL or the LGPL. If you do not delete |
---|
34 | * the provisions above, a recipient may use your version of this file under |
---|
35 | * the terms of any one of the NPL, the GPL or the LGPL. |
---|
36 | * |
---|
37 | * ***** END LICENSE BLOCK ***** */ |
---|
38 | |
---|
39 | |
---|
40 | /** |
---|
41 | * MODULE NOTES: |
---|
42 | * @update gess 4/1/98 |
---|
43 | * |
---|
44 | */ |
---|
45 | |
---|
46 | #include "nsIAtom.h" |
---|
47 | #include "nsHTMLTokenizer.h" |
---|
48 | #include "nsScanner.h" |
---|
49 | #include "nsElementTable.h" |
---|
50 | #include "CParserContext.h" |
---|
51 | #include "nsReadableUtils.h" |
---|
52 | #include "nsUnicharUtils.h" |
---|
53 | |
---|
54 | /************************************************************************ |
---|
55 | And now for the main class -- nsHTMLTokenizer... |
---|
56 | ************************************************************************/ |
---|
57 | |
---|
58 | static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID); |
---|
59 | static NS_DEFINE_IID(kITokenizerIID, NS_ITOKENIZER_IID); |
---|
60 | static NS_DEFINE_IID(kClassIID, NS_HTMLTOKENIZER_IID); |
---|
61 | |
---|
62 | /** |
---|
63 | * This method gets called as part of our COM-like interfaces. |
---|
64 | * Its purpose is to create an interface to parser object |
---|
65 | * of some type. |
---|
66 | * |
---|
67 | * @update gess 4/8/98 |
---|
68 | * @param nsIID id of object to discover |
---|
69 | * @param aInstancePtr ptr to newly discovered interface |
---|
70 | * @return NS_xxx result code |
---|
71 | */ |
---|
72 | nsresult nsHTMLTokenizer::QueryInterface(const nsIID& aIID, void** aInstancePtr) |
---|
73 | { |
---|
74 | if (NULL == aInstancePtr) { |
---|
75 | return NS_ERROR_NULL_POINTER; |
---|
76 | } |
---|
77 | |
---|
78 | if(aIID.Equals(kISupportsIID)) { //do IUnknown... |
---|
79 | *aInstancePtr = (nsISupports*)(this); |
---|
80 | } |
---|
81 | else if(aIID.Equals(kITokenizerIID)) { //do IParser base class... |
---|
82 | *aInstancePtr = (nsITokenizer*)(this); |
---|
83 | } |
---|
84 | else if(aIID.Equals(kClassIID)) { //do this class... |
---|
85 | *aInstancePtr = (nsHTMLTokenizer*)(this); |
---|
86 | } |
---|
87 | else { |
---|
88 | *aInstancePtr=0; |
---|
89 | return NS_NOINTERFACE; |
---|
90 | } |
---|
91 | NS_ADDREF_THIS(); |
---|
92 | return NS_OK; |
---|
93 | } |
---|
94 | |
---|
95 | /** |
---|
96 | * This method is defined in nsHTMLTokenizer.h. It is used to |
---|
97 | * cause the COM-like construction of an HTMLTokenizer. |
---|
98 | * |
---|
99 | * @update gess 4/8/98 |
---|
100 | * @param nsIParser** ptr to newly instantiated parser |
---|
101 | * @return NS_xxx error result |
---|
102 | */ |
---|
103 | |
---|
104 | nsresult NS_NewHTMLTokenizer(nsITokenizer** aInstancePtrResult, |
---|
105 | PRInt32 aFlag, |
---|
106 | eParserDocType aDocType, |
---|
107 | eParserCommands aCommand) |
---|
108 | { |
---|
109 | NS_PRECONDITION(nsnull != aInstancePtrResult, "null ptr"); |
---|
110 | if (nsnull == aInstancePtrResult) { |
---|
111 | return NS_ERROR_NULL_POINTER; |
---|
112 | } |
---|
113 | nsHTMLTokenizer* it = new nsHTMLTokenizer(aFlag,aDocType,aCommand); |
---|
114 | if (nsnull == it) { |
---|
115 | return NS_ERROR_OUT_OF_MEMORY; |
---|
116 | } |
---|
117 | return it->QueryInterface(kClassIID, (void **) aInstancePtrResult); |
---|
118 | } |
---|
119 | |
---|
120 | |
---|
121 | NS_IMPL_ADDREF(nsHTMLTokenizer) |
---|
122 | NS_IMPL_RELEASE(nsHTMLTokenizer) |
---|
123 | |
---|
124 | |
---|
125 | /** |
---|
126 | * Default constructor |
---|
127 | * |
---|
128 | * @update gess 4/9/98 |
---|
129 | * @param |
---|
130 | * @return |
---|
131 | */ |
---|
132 | nsHTMLTokenizer::nsHTMLTokenizer(PRInt32 aParseMode, |
---|
133 | eParserDocType aDocType, |
---|
134 | eParserCommands aCommand) : |
---|
135 | nsITokenizer(), mTokenDeque(0) |
---|
136 | { |
---|
137 | if (aParseMode==eDTDMode_full_standards || |
---|
138 | aParseMode==eDTDMode_almost_standards) { |
---|
139 | mFlags = NS_IPARSER_FLAG_STRICT_MODE; |
---|
140 | } |
---|
141 | else if (aParseMode==eDTDMode_quirks) { |
---|
142 | mFlags = NS_IPARSER_FLAG_QUIRKS_MODE; |
---|
143 | } |
---|
144 | else if (aParseMode==eDTDMode_autodetect) { |
---|
145 | mFlags = NS_IPARSER_FLAG_AUTO_DETECT_MODE; |
---|
146 | } |
---|
147 | else { |
---|
148 | mFlags = NS_IPARSER_FLAG_UNKNOWN_MODE; |
---|
149 | } |
---|
150 | |
---|
151 | if (aDocType==ePlainText) { |
---|
152 | mFlags |= NS_IPARSER_FLAG_PLAIN_TEXT; |
---|
153 | } |
---|
154 | else if (aDocType==eXML) { |
---|
155 | mFlags |= NS_IPARSER_FLAG_XML; |
---|
156 | } |
---|
157 | else if (aDocType==eHTML_Quirks || |
---|
158 | aDocType==eHTML3_Quirks || |
---|
159 | aDocType==eHTML_Strict) { |
---|
160 | mFlags |= NS_IPARSER_FLAG_HTML; |
---|
161 | } |
---|
162 | |
---|
163 | mFlags |= (aCommand==eViewSource)? NS_IPARSER_FLAG_VIEW_SOURCE:NS_IPARSER_FLAG_VIEW_NORMAL; |
---|
164 | |
---|
165 | mTokenAllocator = nsnull; |
---|
166 | mTokenScanPos = 0; |
---|
167 | mPreserveTarget = eHTMLTag_unknown; |
---|
168 | } |
---|
169 | |
---|
170 | |
---|
171 | /** |
---|
172 | * Destructor |
---|
173 | * |
---|
174 | * @update gess 4/9/98 |
---|
175 | * @param |
---|
176 | * @return |
---|
177 | */ |
---|
178 | nsHTMLTokenizer::~nsHTMLTokenizer(){ |
---|
179 | if(mTokenDeque.GetSize()){ |
---|
180 | CTokenDeallocator theDeallocator(mTokenAllocator->GetArenaPool()); |
---|
181 | mTokenDeque.ForEach(theDeallocator); |
---|
182 | } |
---|
183 | } |
---|
184 | |
---|
185 | |
---|
186 | /******************************************************************* |
---|
187 | Here begins the real working methods for the tokenizer. |
---|
188 | *******************************************************************/ |
---|
189 | |
---|
190 | void nsHTMLTokenizer::AddToken(CToken*& aToken,nsresult aResult,nsDeque* aDeque,nsTokenAllocator* aTokenAllocator) { |
---|
191 | if(aToken && aDeque) { |
---|
192 | if(NS_SUCCEEDED(aResult)) { |
---|
193 | aDeque->Push(aToken); |
---|
194 | } |
---|
195 | else { |
---|
196 | IF_FREE(aToken, aTokenAllocator); |
---|
197 | } |
---|
198 | } |
---|
199 | } |
---|
200 | |
---|
201 | /** |
---|
202 | * Retrieve a ptr to the global token recycler... |
---|
203 | * @update gess8/4/98 |
---|
204 | * @return ptr to recycler (or null) |
---|
205 | */ |
---|
206 | nsTokenAllocator* nsHTMLTokenizer::GetTokenAllocator(void) { |
---|
207 | return mTokenAllocator; |
---|
208 | } |
---|
209 | |
---|
210 | |
---|
211 | /** |
---|
212 | * This method provides access to the topmost token in the tokenDeque. |
---|
213 | * The token is not really removed from the list. |
---|
214 | * @update gess8/2/98 |
---|
215 | * @return ptr to token |
---|
216 | */ |
---|
217 | CToken* nsHTMLTokenizer::PeekToken() { |
---|
218 | return (CToken*)mTokenDeque.PeekFront(); |
---|
219 | } |
---|
220 | |
---|
221 | |
---|
222 | /** |
---|
223 | * This method provides access to the topmost token in the tokenDeque. |
---|
224 | * The token is really removed from the list; if the list is empty we return 0. |
---|
225 | * @update gess8/2/98 |
---|
226 | * @return ptr to token or NULL |
---|
227 | */ |
---|
228 | CToken* nsHTMLTokenizer::PopToken() { |
---|
229 | CToken* result=nsnull; |
---|
230 | result=(CToken*)mTokenDeque.PopFront(); |
---|
231 | return result; |
---|
232 | } |
---|
233 | |
---|
234 | |
---|
235 | /** |
---|
236 | * |
---|
237 | * @update gess8/2/98 |
---|
238 | * @param |
---|
239 | * @return |
---|
240 | */ |
---|
241 | CToken* nsHTMLTokenizer::PushTokenFront(CToken* theToken) { |
---|
242 | mTokenDeque.PushFront(theToken); |
---|
243 | return theToken; |
---|
244 | } |
---|
245 | |
---|
246 | /** |
---|
247 | * |
---|
248 | * @update gess8/2/98 |
---|
249 | * @param |
---|
250 | * @return |
---|
251 | */ |
---|
252 | CToken* nsHTMLTokenizer::PushToken(CToken* theToken) { |
---|
253 | mTokenDeque.Push(theToken); |
---|
254 | return theToken; |
---|
255 | } |
---|
256 | |
---|
257 | /** |
---|
258 | * |
---|
259 | * @update gess12/29/98 |
---|
260 | * @param |
---|
261 | * @return |
---|
262 | */ |
---|
263 | PRInt32 nsHTMLTokenizer::GetCount(void) { |
---|
264 | return mTokenDeque.GetSize(); |
---|
265 | } |
---|
266 | |
---|
267 | /** |
---|
268 | * |
---|
269 | * @update gess12/29/98 |
---|
270 | * @param |
---|
271 | * @return |
---|
272 | */ |
---|
273 | CToken* nsHTMLTokenizer::GetTokenAt(PRInt32 anIndex){ |
---|
274 | return (CToken*)mTokenDeque.ObjectAt(anIndex); |
---|
275 | } |
---|
276 | |
---|
277 | /** |
---|
278 | * @update gess 12/29/98 |
---|
279 | * @update harishd 08/04/00 |
---|
280 | * @param |
---|
281 | * @return |
---|
282 | */ |
---|
283 | nsresult nsHTMLTokenizer::WillTokenize(PRBool aIsFinalChunk,nsTokenAllocator* aTokenAllocator) |
---|
284 | { |
---|
285 | mTokenAllocator=aTokenAllocator; |
---|
286 | mIsFinalChunk=aIsFinalChunk; |
---|
287 | mTokenScanPos=mTokenDeque.GetSize(); //cause scanDocStructure to search from here for new tokens... |
---|
288 | return NS_OK; |
---|
289 | } |
---|
290 | |
---|
291 | /** |
---|
292 | * |
---|
293 | * @update gess12/29/98 |
---|
294 | * @param |
---|
295 | * @return |
---|
296 | */ |
---|
297 | void nsHTMLTokenizer::PrependTokens(nsDeque& aDeque){ |
---|
298 | |
---|
299 | PRInt32 aCount=aDeque.GetSize(); |
---|
300 | |
---|
301 | //last but not least, let's check the misplaced content list. |
---|
302 | //if we find it, then we have to push it all into the body before continuing... |
---|
303 | PRInt32 anIndex=0; |
---|
304 | for(anIndex=0;anIndex<aCount;++anIndex){ |
---|
305 | CToken* theToken=(CToken*)aDeque.Pop(); |
---|
306 | PushTokenFront(theToken); |
---|
307 | } |
---|
308 | |
---|
309 | } |
---|
310 | |
---|
311 | NS_IMETHODIMP |
---|
312 | nsHTMLTokenizer::CopyState(nsITokenizer* aTokenizer) |
---|
313 | { |
---|
314 | if (aTokenizer) { |
---|
315 | mFlags &= ~NS_IPARSER_FLAG_PRESERVE_CONTENT; |
---|
316 | mPreserveTarget = |
---|
317 | NS_STATIC_CAST(nsHTMLTokenizer*, aTokenizer)->mPreserveTarget; |
---|
318 | if (mPreserveTarget != eHTMLTag_unknown) |
---|
319 | mFlags |= NS_IPARSER_FLAG_PRESERVE_CONTENT; |
---|
320 | } |
---|
321 | return NS_OK; |
---|
322 | } |
---|
323 | |
---|
324 | /** |
---|
325 | * This is a utilty method for ScanDocStructure, which finds a given |
---|
326 | * tag in the stack. |
---|
327 | * |
---|
328 | * @update gess 08/30/00 |
---|
329 | * @param aTag -- the ID of the tag we're seeking |
---|
330 | * @param aTagStack -- the stack to be searched |
---|
331 | * @return index pos of tag in stack if found, otherwise kNotFound |
---|
332 | */ |
---|
333 | static PRInt32 FindLastIndexOfTag(eHTMLTags aTag,nsDeque &aTagStack) { |
---|
334 | PRInt32 theCount=aTagStack.GetSize(); |
---|
335 | |
---|
336 | while(0<theCount) { |
---|
337 | CHTMLToken *theToken=(CHTMLToken*)aTagStack.ObjectAt(--theCount); |
---|
338 | if(theToken) { |
---|
339 | eHTMLTags theTag=(eHTMLTags)theToken->GetTypeID(); |
---|
340 | if(theTag==aTag) { |
---|
341 | return theCount; |
---|
342 | } |
---|
343 | } |
---|
344 | } |
---|
345 | |
---|
346 | return kNotFound; |
---|
347 | } |
---|
348 | |
---|
349 | /** |
---|
350 | * This method scans the sequence of tokens to determine the |
---|
351 | * well formedness of each tag structure. This is used to |
---|
352 | * disable residual-style handling in well formed cases. |
---|
353 | * |
---|
354 | * @update gess 1Sep2000 |
---|
355 | * @param |
---|
356 | * @return |
---|
357 | */ |
---|
358 | nsresult nsHTMLTokenizer::ScanDocStructure(PRBool aFinalChunk) { |
---|
359 | nsresult result=NS_OK; |
---|
360 | if (!mTokenDeque.GetSize()) |
---|
361 | return result; |
---|
362 | |
---|
363 | CHTMLToken *theRootToken=0; |
---|
364 | |
---|
365 | //*** start by finding the first start tag that hasn't been reviewed. |
---|
366 | |
---|
367 | while(mTokenScanPos>0) { |
---|
368 | theRootToken=(CHTMLToken*)mTokenDeque.ObjectAt(mTokenScanPos); |
---|
369 | if(theRootToken) { |
---|
370 | eHTMLTokenTypes theType=eHTMLTokenTypes(theRootToken->GetTokenType()); |
---|
371 | if(eToken_start==theType) { |
---|
372 | if(eFormUnknown==theRootToken->GetContainerInfo()) { |
---|
373 | break; |
---|
374 | } |
---|
375 | } |
---|
376 | } |
---|
377 | mTokenScanPos--; |
---|
378 | } |
---|
379 | |
---|
380 | /*---------------------------------------------------------------------- |
---|
381 | * Now that we know where to start, let's walk through the |
---|
382 | * tokens to see which are well-formed. Stop when you run out |
---|
383 | * of fresh tokens. |
---|
384 | *---------------------------------------------------------------------*/ |
---|
385 | |
---|
386 | theRootToken=(CHTMLToken*)mTokenDeque.ObjectAt(mTokenScanPos); //init to root |
---|
387 | |
---|
388 | nsDeque theStack(0); |
---|
389 | eHTMLTags theRootTag=eHTMLTag_unknown; |
---|
390 | CHTMLToken *theToken=theRootToken; //init to root |
---|
391 | PRInt32 theStackDepth=0; |
---|
392 | |
---|
393 | static const PRInt32 theMaxStackDepth=200; //dont bother if we get ridiculously deep. |
---|
394 | |
---|
395 | while(theToken && (theStackDepth<theMaxStackDepth)) { |
---|
396 | |
---|
397 | eHTMLTokenTypes theType=eHTMLTokenTypes(theToken->GetTokenType()); |
---|
398 | eHTMLTags theTag=(eHTMLTags)theToken->GetTypeID(); |
---|
399 | |
---|
400 | PRBool theTagIsContainer=nsHTMLElement::IsContainer(theTag); //bug54117... |
---|
401 | |
---|
402 | if(theTagIsContainer) { |
---|
403 | PRBool theTagIsBlock=gHTMLElements[theTag].IsMemberOf(kBlockEntity); |
---|
404 | PRBool theTagIsInline= (theTagIsBlock) ? PR_FALSE : gHTMLElements[theTag].IsMemberOf(kInlineEntity); |
---|
405 | |
---|
406 | if(theTagIsBlock || theTagIsInline || (eHTMLTag_table==theTag)) { |
---|
407 | |
---|
408 | switch(theType) { |
---|
409 | |
---|
410 | case eToken_start: |
---|
411 | if(0==theStack.GetSize()) { |
---|
412 | //track the tag on the top of the stack... |
---|
413 | theRootToken=theToken; |
---|
414 | theRootTag=theTag; |
---|
415 | } |
---|
416 | theStack.Push(theToken); |
---|
417 | ++theStackDepth; |
---|
418 | break; |
---|
419 | |
---|
420 | case eToken_end: |
---|
421 | { |
---|
422 | CHTMLToken *theLastToken= NS_STATIC_CAST(CHTMLToken*, theStack.Peek()); |
---|
423 | if(theLastToken) { |
---|
424 | if(theTag==theLastToken->GetTypeID()) { |
---|
425 | theStack.Pop(); //yank it for real |
---|
426 | theStackDepth--; |
---|
427 | theLastToken->SetContainerInfo(eWellFormed); |
---|
428 | |
---|
429 | //in addition, let's look above this container to see if we can find |
---|
430 | //any tags that are already marked malformed. If so, pop them too! |
---|
431 | |
---|
432 | theLastToken= NS_STATIC_CAST(CHTMLToken*, theStack.Peek()); |
---|
433 | while(theLastToken) { |
---|
434 | if(eMalformed==theRootToken->GetContainerInfo()) { |
---|
435 | theStack.Pop(); //yank the malformed token for real. |
---|
436 | theLastToken= NS_STATIC_CAST(CHTMLToken*, theStack.Peek()); |
---|
437 | continue; |
---|
438 | } |
---|
439 | break; |
---|
440 | } |
---|
441 | } |
---|
442 | else { |
---|
443 | //the topmost token isn't what we expected, so that container must |
---|
444 | //be malformed. If the tag is a block, we don't really care (but we'll |
---|
445 | //mark it anyway). If it's an inline we DO care, especially if the |
---|
446 | //inline tried to contain a block (that's when RS handling kicks in). |
---|
447 | if(theTagIsInline) { |
---|
448 | PRInt32 theIndex=FindLastIndexOfTag(theTag,theStack); |
---|
449 | if(kNotFound!=theIndex) { |
---|
450 | theToken=(CHTMLToken*)theStack.ObjectAt(theIndex); |
---|
451 | theToken->SetContainerInfo(eMalformed); |
---|
452 | } |
---|
453 | //otherwise we ignore an out-of-place end tag. |
---|
454 | } |
---|
455 | else { |
---|
456 | } |
---|
457 | } |
---|
458 | } |
---|
459 | } |
---|
460 | break; |
---|
461 | |
---|
462 | default: |
---|
463 | break; |
---|
464 | } //switch |
---|
465 | |
---|
466 | } |
---|
467 | } |
---|
468 | |
---|
469 | theToken=(CHTMLToken*)mTokenDeque.ObjectAt(++mTokenScanPos); |
---|
470 | } |
---|
471 | |
---|
472 | return result; |
---|
473 | } |
---|
474 | |
---|
475 | nsresult nsHTMLTokenizer::DidTokenize(PRBool aFinalChunk) { |
---|
476 | return ScanDocStructure(aFinalChunk); |
---|
477 | } |
---|
478 | |
---|
479 | /** |
---|
480 | * This method repeatedly called by the tokenizer. |
---|
481 | * Each time, we determine the kind of token were about to |
---|
482 | * read, and then we call the appropriate method to handle |
---|
483 | * that token type. |
---|
484 | * |
---|
485 | * @update gess 3/25/98 |
---|
486 | * @param aChar: last char read |
---|
487 | * @param aScanner: see nsScanner.h |
---|
488 | * @param anErrorCode: arg that will hold error condition |
---|
489 | * @return new token or null |
---|
490 | */ |
---|
491 | nsresult nsHTMLTokenizer::ConsumeToken(nsScanner& aScanner,PRBool& aFlushTokens) { |
---|
492 | |
---|
493 | PRUnichar theChar; |
---|
494 | CToken* theToken=0; |
---|
495 | |
---|
496 | nsresult result=aScanner.Peek(theChar); |
---|
497 | |
---|
498 | switch(result) { |
---|
499 | case kEOF: |
---|
500 | //We convert from eof to complete here, because we never really tried to get data. |
---|
501 | //All we did was try to see if data was available, which it wasn't. |
---|
502 | //It's important to return process complete, so that controlling logic can know that |
---|
503 | //everything went well, but we're done with token processing. |
---|
504 | return result; |
---|
505 | |
---|
506 | case NS_OK: |
---|
507 | default: |
---|
508 | |
---|
509 | if(!(mFlags & NS_IPARSER_FLAG_PLAIN_TEXT)) { |
---|
510 | if(kLessThan==theChar) { |
---|
511 | return ConsumeTag(theChar,theToken,aScanner,aFlushTokens); |
---|
512 | } |
---|
513 | else if(kAmpersand==theChar){ |
---|
514 | return ConsumeEntity(theChar,theToken,aScanner); |
---|
515 | } |
---|
516 | } |
---|
517 | |
---|
518 | if((kCR==theChar) || (kLF==theChar)) { |
---|
519 | return ConsumeNewline(theChar,theToken,aScanner); |
---|
520 | } |
---|
521 | else { |
---|
522 | if(!nsCRT::IsAsciiSpace(theChar)) { |
---|
523 | if(theChar!=nsnull) { |
---|
524 | result=ConsumeText(theToken,aScanner); |
---|
525 | } |
---|
526 | else { |
---|
527 | aScanner.GetChar(theChar); // skip the embedded null char. Fix bug 64098. |
---|
528 | } |
---|
529 | break; |
---|
530 | } |
---|
531 | result=ConsumeWhitespace(theChar,theToken,aScanner); |
---|
532 | } |
---|
533 | break; |
---|
534 | } //switch |
---|
535 | |
---|
536 | return result; |
---|
537 | } |
---|
538 | |
---|
539 | |
---|
540 | /** |
---|
541 | * This method is called just after a "<" has been consumed |
---|
542 | * and we know we're at the start of some kind of tagged |
---|
543 | * element. We don't know yet if it's a tag or a comment. |
---|
544 | * |
---|
545 | * @update gess 5/12/98 |
---|
546 | * @param aChar is the last char read |
---|
547 | * @param aScanner is represents our input source |
---|
548 | * @param aToken is the out arg holding our new token |
---|
549 | * @return error code. |
---|
550 | */ |
---|
551 | nsresult nsHTMLTokenizer::ConsumeTag(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner,PRBool& aFlushTokens) { |
---|
552 | |
---|
553 | PRUnichar theNextChar, oldChar; |
---|
554 | nsresult result=aScanner.Peek(aChar,1); |
---|
555 | |
---|
556 | if(NS_OK==result) { |
---|
557 | |
---|
558 | switch(aChar) { |
---|
559 | case kForwardSlash: |
---|
560 | // Get the original "<" (we've already seen it with a Peek) |
---|
561 | aScanner.GetChar(oldChar); |
---|
562 | |
---|
563 | result=aScanner.Peek(theNextChar, 1); |
---|
564 | if(NS_OK==result) { |
---|
565 | // xml allow non ASCII tag name, consume as end tag. need to make xml view source work |
---|
566 | PRBool isXML=(mFlags & NS_IPARSER_FLAG_XML); |
---|
567 | if(nsCRT::IsAsciiAlpha(theNextChar)||(kGreaterThan==theNextChar)|| |
---|
568 | (isXML && (! nsCRT::IsAscii(theNextChar)))) { |
---|
569 | result=ConsumeEndTag(aChar,aToken,aScanner); |
---|
570 | } |
---|
571 | else result=ConsumeComment(aChar,aToken,aScanner); |
---|
572 | }//if |
---|
573 | break; |
---|
574 | |
---|
575 | case kExclamation: |
---|
576 | // Get the original "<" (we've already seen it with a Peek) |
---|
577 | aScanner.GetChar(oldChar); |
---|
578 | |
---|
579 | result=aScanner.Peek(theNextChar, 1); |
---|
580 | if(NS_OK==result) { |
---|
581 | if((kMinus==theNextChar) || (kGreaterThan==theNextChar)) { |
---|
582 | result=ConsumeComment(aChar,aToken,aScanner); |
---|
583 | } |
---|
584 | else |
---|
585 | result=ConsumeSpecialMarkup(aChar,aToken,aScanner); |
---|
586 | } |
---|
587 | break; |
---|
588 | |
---|
589 | case kQuestionMark: //it must be an XML processing instruction... |
---|
590 | // Get the original "<" (we've already seen it with a Peek) |
---|
591 | aScanner.GetChar(oldChar); |
---|
592 | result=ConsumeProcessingInstruction(aChar,aToken,aScanner); |
---|
593 | break; |
---|
594 | |
---|
595 | default: |
---|
596 | if(nsCRT::IsAsciiAlpha(aChar)) { |
---|
597 | // Get the original "<" (we've already seen it with a Peek) |
---|
598 | aScanner.GetChar(oldChar); |
---|
599 | result=ConsumeStartTag(aChar,aToken,aScanner,aFlushTokens); |
---|
600 | } |
---|
601 | else { |
---|
602 | // We are not dealing with a tag. So, don't consume the original |
---|
603 | // char and leave the decision to ConsumeText(). |
---|
604 | result=ConsumeText(aToken,aScanner); |
---|
605 | } |
---|
606 | } //switch |
---|
607 | |
---|
608 | } //if |
---|
609 | return result; |
---|
610 | } |
---|
611 | |
---|
612 | /** |
---|
613 | * This method is called just after we've consumed a start |
---|
614 | * tag, and we now have to consume its attributes. |
---|
615 | * |
---|
616 | * @update rickg 03.23.2000 |
---|
617 | * @param aChar: last char read |
---|
618 | * @param aScanner: see nsScanner.h |
---|
619 | * @param aLeadingWS: contains ws chars that preceeded the first attribute |
---|
620 | * @return |
---|
621 | */ |
---|
622 | nsresult nsHTMLTokenizer::ConsumeAttributes(PRUnichar aChar, |
---|
623 | CToken* aToken, |
---|
624 | nsScanner& aScanner) { |
---|
625 | PRBool done=PR_FALSE; |
---|
626 | nsresult result=NS_OK; |
---|
627 | PRInt16 theAttrCount=0; |
---|
628 | |
---|
629 | nsTokenAllocator* theAllocator=this->GetTokenAllocator(); |
---|
630 | |
---|
631 | while((!done) && (result==NS_OK)) { |
---|
632 | CAttributeToken* theToken= NS_STATIC_CAST(CAttributeToken*, theAllocator->CreateTokenOfType(eToken_attribute,eHTMLTag_unknown)); |
---|
633 | if(theToken){ |
---|
634 | result=theToken->Consume(aChar,aScanner,mFlags); //tell new token to finish consuming text... |
---|
635 | |
---|
636 | //Much as I hate to do this, here's some special case code. |
---|
637 | //This handles the case of empty-tags in XML. Our last |
---|
638 | //attribute token will come through with a text value of "" |
---|
639 | //and a textkey of "/". We should destroy it, and tell the |
---|
640 | //start token it was empty. |
---|
641 | if(NS_SUCCEEDED(result)) { |
---|
642 | PRBool isUsableAttr = PR_TRUE; |
---|
643 | const nsAString& key=theToken->GetKey(); |
---|
644 | const nsAString& text=theToken->GetValue(); |
---|
645 | |
---|
646 | // support XML like syntax to fix bugs like 44186 |
---|
647 | if(!key.IsEmpty() && kForwardSlash==key.First() && text.IsEmpty()) { |
---|
648 | isUsableAttr = PRBool(mFlags & NS_IPARSER_FLAG_VIEW_SOURCE); // Fix bug 103095 |
---|
649 | aToken->SetEmpty(isUsableAttr); |
---|
650 | } |
---|
651 | if(isUsableAttr) { |
---|
652 | ++theAttrCount; |
---|
653 | AddToken((CToken*&)theToken,result,&mTokenDeque,theAllocator); |
---|
654 | } |
---|
655 | else { |
---|
656 | IF_FREE(theToken, mTokenAllocator); |
---|
657 | } |
---|
658 | } |
---|
659 | else { //if(NS_ERROR_HTMLPARSER_BADATTRIBUTE==result){ |
---|
660 | aToken->SetEmpty(PR_TRUE); |
---|
661 | IF_FREE(theToken, mTokenAllocator); |
---|
662 | if(NS_ERROR_HTMLPARSER_BADATTRIBUTE==result) |
---|
663 | result=NS_OK; |
---|
664 | } |
---|
665 | }//if |
---|
666 | |
---|
667 | #ifdef DEBUG |
---|
668 | if(NS_SUCCEEDED(result)){ |
---|
669 | PRInt32 newline = 0; |
---|
670 | result = aScanner.SkipWhitespace(newline); |
---|
671 | NS_ASSERTION(newline == 0, "CAttribute::Consume() failed to collect all the newlines!"); |
---|
672 | } |
---|
673 | #endif |
---|
674 | if (NS_SUCCEEDED(result)) { |
---|
675 | result = aScanner.Peek(aChar); |
---|
676 | if (NS_SUCCEEDED(result)) { |
---|
677 | if (aChar == kGreaterThan) { //you just ate the '>' |
---|
678 | aScanner.GetChar(aChar); //skip the '>' |
---|
679 | done = PR_TRUE; |
---|
680 | } |
---|
681 | else if(aChar == kLessThan) { |
---|
682 | done = PR_TRUE; |
---|
683 | } |
---|
684 | }//if |
---|
685 | }//if |
---|
686 | }//while |
---|
687 | |
---|
688 | aToken->SetAttributeCount(theAttrCount); |
---|
689 | return result; |
---|
690 | } |
---|
691 | |
---|
692 | /** |
---|
693 | * |
---|
694 | * @update gess12/28/98 |
---|
695 | * @param |
---|
696 | * @return |
---|
697 | */ |
---|
698 | nsresult nsHTMLTokenizer::ConsumeStartTag(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner,PRBool& aFlushTokens) { |
---|
699 | PRInt32 theDequeSize=mTokenDeque.GetSize(); //remember this for later in case you have to unwind... |
---|
700 | nsresult result=NS_OK; |
---|
701 | |
---|
702 | nsTokenAllocator* theAllocator=this->GetTokenAllocator(); |
---|
703 | aToken=theAllocator->CreateTokenOfType(eToken_start,eHTMLTag_unknown); |
---|
704 | |
---|
705 | if(aToken) { |
---|
706 | // Save the position after '<' for use in recording traling contents. Ref: Bug. 15204. |
---|
707 | nsScannerIterator origin; |
---|
708 | aScanner.CurrentPosition(origin); |
---|
709 | |
---|
710 | result= aToken->Consume(aChar,aScanner,mFlags); //tell new token to finish consuming text... |
---|
711 | |
---|
712 | if(NS_SUCCEEDED(result)) { |
---|
713 | |
---|
714 | AddToken(aToken,result,&mTokenDeque,theAllocator); |
---|
715 | NS_ENSURE_SUCCESS(result, result); |
---|
716 | |
---|
717 | eHTMLTags theTag=(eHTMLTags)aToken->GetTypeID(); |
---|
718 | |
---|
719 | //Good. Now, let's see if the next char is ">". |
---|
720 | //If so, we have a complete tag, otherwise, we have attributes. |
---|
721 | result = aScanner.Peek(aChar); |
---|
722 | NS_ENSURE_SUCCESS(result, result); |
---|
723 | |
---|
724 | if(kGreaterThan != aChar) { //look for '>' |
---|
725 | result = ConsumeAttributes(aChar, aToken, aScanner); |
---|
726 | } //if |
---|
727 | else { |
---|
728 | aScanner.GetChar(aChar); |
---|
729 | } |
---|
730 | |
---|
731 | /* Now that that's over with, we have one more problem to solve. |
---|
732 | In the case that we just read a <SCRIPT> or <STYLE> tags, we should go and |
---|
733 | consume all the content itself. |
---|
734 | */ |
---|
735 | if(NS_SUCCEEDED(result)) { |
---|
736 | CStartToken* theStartToken = NS_STATIC_CAST(CStartToken*,aToken); |
---|
737 | //XXX - Find a better soution to record content |
---|
738 | //Added _plaintext to fix bug 46054. |
---|
739 | if(!(mFlags & NS_IPARSER_FLAG_PRESERVE_CONTENT) && |
---|
740 | (theTag == eHTMLTag_textarea || |
---|
741 | theTag == eHTMLTag_xmp || |
---|
742 | theTag == eHTMLTag_plaintext || |
---|
743 | theTag == eHTMLTag_noscript || |
---|
744 | theTag == eHTMLTag_noframes)) { |
---|
745 | NS_ASSERTION(mPreserveTarget == eHTMLTag_unknown, |
---|
746 | "mPreserveTarget set but not preserving content?"); |
---|
747 | mPreserveTarget = theTag; |
---|
748 | mFlags |= NS_IPARSER_FLAG_PRESERVE_CONTENT; |
---|
749 | } |
---|
750 | |
---|
751 | if (mFlags & NS_IPARSER_FLAG_PRESERVE_CONTENT) |
---|
752 | PreserveToken(theStartToken, aScanner, origin); |
---|
753 | |
---|
754 | //if((eHTMLTag_style==theTag) || (eHTMLTag_script==theTag)) { |
---|
755 | if(gHTMLElements[theTag].CanContainType(kCDATA)) { |
---|
756 | nsAutoString endTagName; |
---|
757 | endTagName.Assign(nsHTMLTags::GetStringValue(theTag)); |
---|
758 | |
---|
759 | CToken* text=theAllocator->CreateTokenOfType(eToken_text,eHTMLTag_text); |
---|
760 | CTextToken* textToken=NS_STATIC_CAST(CTextToken*,text); |
---|
761 | result=textToken->ConsumeUntil(0,theTag!=eHTMLTag_script,aScanner,endTagName,mFlags,aFlushTokens); //tell new token to finish consuming text... |
---|
762 | |
---|
763 | // Fix bug 44186 |
---|
764 | // Support XML like syntax, i.e., <script src="external.js"/> == <script src="external.js"></script> |
---|
765 | // Note: if aFlushTokens is TRUE then we have seen an </script> |
---|
766 | // We do NOT want to output the end token if we didn't see a |
---|
767 | // </script> and have a preserve target. If that happens, then we'd |
---|
768 | // be messing up the text inside the <textarea> or <xmp> or whatever |
---|
769 | // it is. |
---|
770 | if((!(mFlags & NS_IPARSER_FLAG_PRESERVE_CONTENT) && |
---|
771 | !theStartToken->IsEmpty()) || aFlushTokens) { |
---|
772 | theStartToken->SetEmpty(PR_FALSE); // Setting this would make cases like <script/>d.w("text");</script> work. |
---|
773 | CToken* endToken=theAllocator->CreateTokenOfType(eToken_end,theTag,endTagName); |
---|
774 | AddToken(text,result,&mTokenDeque,theAllocator); |
---|
775 | AddToken(endToken,result,&mTokenDeque,theAllocator); |
---|
776 | } |
---|
777 | else { |
---|
778 | IF_FREE(text, mTokenAllocator); |
---|
779 | } |
---|
780 | } |
---|
781 | } |
---|
782 | |
---|
783 | //EEEEECCCCKKKK!!! |
---|
784 | //This code is confusing, so pay attention. |
---|
785 | //If you're here, it's because we were in the midst of consuming a start |
---|
786 | //tag but ran out of data (not in the stream, but in this *part* of the stream. |
---|
787 | //For simplicity, we have to unwind our input. Therefore, we pop and discard |
---|
788 | //any new tokens we've cued this round. Later we can get smarter about this. |
---|
789 | if(NS_FAILED(result)) { |
---|
790 | while(mTokenDeque.GetSize()>theDequeSize) { |
---|
791 | CToken* theToken=(CToken*)mTokenDeque.Pop(); |
---|
792 | IF_FREE(theToken, mTokenAllocator); |
---|
793 | } |
---|
794 | } |
---|
795 | } //if |
---|
796 | else IF_FREE(aToken, mTokenAllocator); |
---|
797 | } //if |
---|
798 | return result; |
---|
799 | } |
---|
800 | |
---|
801 | /** |
---|
802 | * |
---|
803 | * @update gess12/28/98 |
---|
804 | * @param |
---|
805 | * @return |
---|
806 | */ |
---|
807 | nsresult nsHTMLTokenizer::ConsumeEndTag(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner) { |
---|
808 | |
---|
809 | // Get the "/" (we've already seen it with a Peek) |
---|
810 | aScanner.GetChar(aChar); |
---|
811 | |
---|
812 | nsTokenAllocator* theAllocator=this->GetTokenAllocator(); |
---|
813 | aToken=theAllocator->CreateTokenOfType(eToken_end,eHTMLTag_unknown); |
---|
814 | nsresult result=NS_OK; |
---|
815 | |
---|
816 | if(aToken) { |
---|
817 | result= aToken->Consume(aChar,aScanner,mFlags); //tell new token to finish consuming text... |
---|
818 | AddToken(aToken,result,&mTokenDeque,theAllocator); |
---|
819 | NS_ENSURE_SUCCESS(result, result); |
---|
820 | |
---|
821 | result = aScanner.Peek(aChar); |
---|
822 | NS_ENSURE_SUCCESS(result, result); |
---|
823 | |
---|
824 | if(kGreaterThan != aChar) { |
---|
825 | result = ConsumeAttributes(aChar, aToken, aScanner); |
---|
826 | NS_ENSURE_SUCCESS(result, result); |
---|
827 | } |
---|
828 | else { |
---|
829 | aScanner.GetChar(aChar); |
---|
830 | } |
---|
831 | |
---|
832 | if (NS_SUCCEEDED(result)) { |
---|
833 | eHTMLTags theTag = (eHTMLTags)aToken->GetTypeID(); |
---|
834 | if (mPreserveTarget == theTag) { |
---|
835 | // Target reached. Stop preserving content. |
---|
836 | mPreserveTarget = eHTMLTag_unknown; |
---|
837 | mFlags &= ~NS_IPARSER_FLAG_PRESERVE_CONTENT; |
---|
838 | } |
---|
839 | } |
---|
840 | } //if |
---|
841 | return result; |
---|
842 | } |
---|
843 | |
---|
844 | /** |
---|
845 | * This method is called just after a "&" has been consumed |
---|
846 | * and we know we're at the start of an entity. |
---|
847 | * |
---|
848 | * @update gess 3/25/98 |
---|
849 | * @param aChar: last char read |
---|
850 | * @param aScanner: see nsScanner.h |
---|
851 | * @param anErrorCode: arg that will hold error condition |
---|
852 | * @return new token or null |
---|
853 | */ |
---|
854 | nsresult nsHTMLTokenizer::ConsumeEntity(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner) { |
---|
855 | PRUnichar theChar; |
---|
856 | nsresult result=aScanner.Peek(theChar, 1); |
---|
857 | |
---|
858 | nsTokenAllocator* theAllocator=this->GetTokenAllocator(); |
---|
859 | if (NS_SUCCEEDED(result)) { |
---|
860 | if (nsCRT::IsAsciiAlpha(theChar) || theChar==kHashsign) { |
---|
861 | aToken = theAllocator->CreateTokenOfType(eToken_entity,eHTMLTag_entity); |
---|
862 | result=aToken->Consume(theChar,aScanner,mFlags); |
---|
863 | |
---|
864 | if (result == NS_HTMLTOKENS_NOT_AN_ENTITY) { |
---|
865 | IF_FREE(aToken, mTokenAllocator); |
---|
866 | } |
---|
867 | else { |
---|
868 | if (mIsFinalChunk && result == kEOF) { |
---|
869 | result=NS_OK; //use as much of the entity as you can get. |
---|
870 | } |
---|
871 | AddToken(aToken,result,&mTokenDeque,theAllocator); |
---|
872 | return result; |
---|
873 | } |
---|
874 | } |
---|
875 | // oops, we're actually looking at plain text... |
---|
876 | result = ConsumeText(aToken,aScanner); |
---|
877 | }//if |
---|
878 | return result; |
---|
879 | } |
---|
880 | |
---|
881 | |
---|
882 | /** |
---|
883 | * This method is called just after whitespace has been |
---|
884 | * consumed and we know we're at the start a whitespace run. |
---|
885 | * |
---|
886 | * @update gess 3/25/98 |
---|
887 | * @param aChar: last char read |
---|
888 | * @param aScanner: see nsScanner.h |
---|
889 | * @param anErrorCode: arg that will hold error condition |
---|
890 | * @return new token or null |
---|
891 | */ |
---|
892 | nsresult nsHTMLTokenizer::ConsumeWhitespace(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner) { |
---|
893 | // Get the whitespace character |
---|
894 | aScanner.GetChar(aChar); |
---|
895 | |
---|
896 | nsTokenAllocator* theAllocator=this->GetTokenAllocator(); |
---|
897 | aToken = theAllocator->CreateTokenOfType(eToken_whitespace,eHTMLTag_whitespace); |
---|
898 | nsresult result=NS_OK; |
---|
899 | if(aToken) { |
---|
900 | result=aToken->Consume(aChar,aScanner,mFlags); |
---|
901 | AddToken(aToken,result,&mTokenDeque,theAllocator); |
---|
902 | } |
---|
903 | return result; |
---|
904 | } |
---|
905 | |
---|
906 | /** |
---|
907 | * This method is called just after a "<!" has been consumed |
---|
908 | * and we know we're at the start of a comment. |
---|
909 | * |
---|
910 | * @update gess 3/25/98 |
---|
911 | * @param aChar: last char read |
---|
912 | * @param aScanner: see nsScanner.h |
---|
913 | * @param anErrorCode: arg that will hold error condition |
---|
914 | * @return new token or null |
---|
915 | */ |
---|
916 | nsresult nsHTMLTokenizer::ConsumeComment(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner){ |
---|
917 | // Get the "!" |
---|
918 | aScanner.GetChar(aChar); |
---|
919 | |
---|
920 | nsTokenAllocator* theAllocator=this->GetTokenAllocator(); |
---|
921 | aToken = theAllocator->CreateTokenOfType(eToken_comment,eHTMLTag_comment); |
---|
922 | nsresult result=NS_OK; |
---|
923 | if(aToken) { |
---|
924 | result=aToken->Consume(aChar,aScanner,mFlags); |
---|
925 | AddToken(aToken,result,&mTokenDeque,theAllocator); |
---|
926 | } |
---|
927 | return result; |
---|
928 | } |
---|
929 | |
---|
930 | /** |
---|
931 | * This method is called just after a known text char has |
---|
932 | * been consumed and we should read a text run. |
---|
933 | * |
---|
934 | * @update gess 3/25/98 |
---|
935 | * @param aChar: last char read |
---|
936 | * @param aScanner: see nsScanner.h |
---|
937 | * @param anErrorCode: arg that will hold error condition |
---|
938 | * @return new token or null |
---|
939 | */ |
---|
940 | nsresult nsHTMLTokenizer::ConsumeText(CToken*& aToken,nsScanner& aScanner){ |
---|
941 | nsresult result=NS_OK; |
---|
942 | nsTokenAllocator* theAllocator=this->GetTokenAllocator(); |
---|
943 | CTextToken* theToken = (CTextToken*)theAllocator->CreateTokenOfType(eToken_text,eHTMLTag_text); |
---|
944 | if(theToken) { |
---|
945 | PRUnichar ch=0; |
---|
946 | result=theToken->Consume(ch,aScanner,mFlags); |
---|
947 | if(NS_FAILED(result)) { |
---|
948 | if(0==theToken->GetTextLength()){ |
---|
949 | IF_FREE(aToken, mTokenAllocator); |
---|
950 | aToken = nsnull; |
---|
951 | } |
---|
952 | else result=NS_OK; |
---|
953 | } |
---|
954 | aToken = theToken; |
---|
955 | AddToken(aToken,result,&mTokenDeque,theAllocator); |
---|
956 | } |
---|
957 | return result; |
---|
958 | } |
---|
959 | |
---|
960 | /** |
---|
961 | * This method is called just after a "<!" has been consumed. |
---|
962 | * NOTE: Here we might consume DOCTYPE and "special" markups. |
---|
963 | * |
---|
964 | * |
---|
965 | * @update harishd 09/02/99 |
---|
966 | * @param aChar: last char read |
---|
967 | * @param aScanner: see nsScanner.h |
---|
968 | * @param anErrorCode: arg that will hold error condition |
---|
969 | * @return new token or null |
---|
970 | */ |
---|
971 | nsresult nsHTMLTokenizer::ConsumeSpecialMarkup(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner){ |
---|
972 | |
---|
973 | // Get the "!" |
---|
974 | aScanner.GetChar(aChar); |
---|
975 | |
---|
976 | nsresult result=NS_OK; |
---|
977 | nsAutoString theBufCopy; |
---|
978 | aScanner.Peek(theBufCopy, 20); |
---|
979 | ToUpperCase(theBufCopy); |
---|
980 | PRInt32 theIndex=theBufCopy.Find("DOCTYPE"); |
---|
981 | nsTokenAllocator* theAllocator=this->GetTokenAllocator(); |
---|
982 | |
---|
983 | if(theIndex==kNotFound) { |
---|
984 | if('['==theBufCopy.CharAt(0)) { |
---|
985 | aToken = theAllocator->CreateTokenOfType(eToken_cdatasection,eHTMLTag_comment); |
---|
986 | } else if (StringBeginsWith(theBufCopy, NS_LITERAL_STRING("ELEMENT")) || |
---|
987 | StringBeginsWith(theBufCopy, NS_LITERAL_STRING("ATTLIST")) || |
---|
988 | StringBeginsWith(theBufCopy, NS_LITERAL_STRING("ENTITY")) || |
---|
989 | StringBeginsWith(theBufCopy, NS_LITERAL_STRING("NOTATION"))) { |
---|
990 | aToken = theAllocator->CreateTokenOfType(eToken_markupDecl,eHTMLTag_markupDecl); |
---|
991 | } else { |
---|
992 | aToken = theAllocator->CreateTokenOfType(eToken_comment,eHTMLTag_comment); |
---|
993 | } |
---|
994 | } |
---|
995 | else |
---|
996 | aToken = theAllocator->CreateTokenOfType(eToken_doctypeDecl,eHTMLTag_doctypeDecl); |
---|
997 | |
---|
998 | if(aToken) { |
---|
999 | result=aToken->Consume(aChar,aScanner,mFlags); |
---|
1000 | AddToken(aToken,result,&mTokenDeque,theAllocator); |
---|
1001 | } |
---|
1002 | return result; |
---|
1003 | } |
---|
1004 | |
---|
1005 | /** |
---|
1006 | * This method is called just after a newline has been consumed. |
---|
1007 | * |
---|
1008 | * @update gess 3/25/98 |
---|
1009 | * @param aChar: last char read |
---|
1010 | * @param aScanner: see nsScanner.h |
---|
1011 | * @param aToken is the newly created newline token that is parsing |
---|
1012 | * @return error code |
---|
1013 | */ |
---|
1014 | nsresult nsHTMLTokenizer::ConsumeNewline(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner){ |
---|
1015 | // Get the newline character |
---|
1016 | aScanner.GetChar(aChar); |
---|
1017 | |
---|
1018 | nsTokenAllocator* theAllocator=this->GetTokenAllocator(); |
---|
1019 | aToken=theAllocator->CreateTokenOfType(eToken_newline,eHTMLTag_newline); |
---|
1020 | nsresult result=NS_OK; |
---|
1021 | if(aToken) { |
---|
1022 | result=aToken->Consume(aChar,aScanner,mFlags); |
---|
1023 | AddToken(aToken,result,&mTokenDeque,theAllocator); |
---|
1024 | } |
---|
1025 | return result; |
---|
1026 | } |
---|
1027 | |
---|
1028 | |
---|
1029 | /** |
---|
1030 | * This method is called just after a ? has been consumed. |
---|
1031 | * |
---|
1032 | * @update gess 3/25/98 |
---|
1033 | * @param aChar: last char read |
---|
1034 | * @param aScanner: see nsScanner.h |
---|
1035 | * @param aToken is the newly created newline token that is parsing |
---|
1036 | * @return error code |
---|
1037 | */ |
---|
1038 | nsresult nsHTMLTokenizer::ConsumeProcessingInstruction(PRUnichar aChar,CToken*& aToken,nsScanner& aScanner){ |
---|
1039 | |
---|
1040 | // Get the "?" |
---|
1041 | aScanner.GetChar(aChar); |
---|
1042 | |
---|
1043 | nsTokenAllocator* theAllocator=this->GetTokenAllocator(); |
---|
1044 | aToken=theAllocator->CreateTokenOfType(eToken_instruction,eHTMLTag_unknown); |
---|
1045 | nsresult result=NS_OK; |
---|
1046 | if(aToken) { |
---|
1047 | result=aToken->Consume(aChar,aScanner,mFlags); |
---|
1048 | AddToken(aToken,result,&mTokenDeque,theAllocator); |
---|
1049 | } |
---|
1050 | return result; |
---|
1051 | } |
---|
1052 | |
---|
1053 | /** |
---|
1054 | * This method keeps a copy of contents within the start token. |
---|
1055 | * The stored content could later be used in displaying TEXTAREA, |
---|
1056 | * and also in view source. |
---|
1057 | * |
---|
1058 | * @update harishd 11/09/99 |
---|
1059 | * @param aStartToken: The token whose trailing contents are to be recorded |
---|
1060 | * @param aScanner: see nsScanner.h |
---|
1061 | * |
---|
1062 | */ |
---|
1063 | |
---|
1064 | void nsHTMLTokenizer::PreserveToken(CStartToken* aStartToken, |
---|
1065 | nsScanner& aScanner, |
---|
1066 | nsScannerIterator aOrigin) { |
---|
1067 | if(aStartToken) { |
---|
1068 | nsScannerIterator theCurrentPosition; |
---|
1069 | aScanner.CurrentPosition(theCurrentPosition); |
---|
1070 | |
---|
1071 | nsString& trailingContent = aStartToken->mTrailingContent; |
---|
1072 | PRUint32 oldLength = trailingContent.Length(); |
---|
1073 | trailingContent.SetLength(oldLength + Distance(aOrigin, theCurrentPosition)); |
---|
1074 | |
---|
1075 | nsWritingIterator<PRUnichar> beginWriting; |
---|
1076 | trailingContent.BeginWriting(beginWriting); |
---|
1077 | beginWriting.advance(oldLength); |
---|
1078 | |
---|
1079 | copy_string( aOrigin, theCurrentPosition, beginWriting ); |
---|
1080 | } |
---|
1081 | } |
---|