1 | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
---|
2 | /* ***** BEGIN LICENSE BLOCK ***** |
---|
3 | * Version: NPL 1.1/GPL 2.0/LGPL 2.1 |
---|
4 | * |
---|
5 | * The contents of this file are subject to the Netscape Public License |
---|
6 | * Version 1.1 (the "License"); you may not use this file except in |
---|
7 | * compliance with the License. You may obtain a copy of the License at |
---|
8 | * http://www.mozilla.org/NPL/ |
---|
9 | * |
---|
10 | * Software distributed under the License is distributed on an "AS IS" basis, |
---|
11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License |
---|
12 | * for the specific language governing rights and limitations under the |
---|
13 | * License. |
---|
14 | * |
---|
15 | * The Original Code is mozilla.org code. |
---|
16 | * |
---|
17 | * The Initial Developer of the Original Code is |
---|
18 | * Netscape Communications Corporation. |
---|
19 | * Portions created by the Initial Developer are Copyright (C) 1998 |
---|
20 | * the Initial Developer. All Rights Reserved. |
---|
21 | * |
---|
22 | * Contributor(s): |
---|
23 | * |
---|
24 | * |
---|
25 | * Alternatively, the contents of this file may be used under the terms of |
---|
26 | * either the GNU General Public License Version 2 or later (the "GPL"), or |
---|
27 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), |
---|
28 | * in which case the provisions of the GPL or the LGPL are applicable instead |
---|
29 | * of those above. If you wish to allow use of your version of this file only |
---|
30 | * under the terms of either the GPL or the LGPL, and not to allow others to |
---|
31 | * use your version of this file under the terms of the NPL, indicate your |
---|
32 | * decision by deleting the provisions above and replace them with the notice |
---|
33 | * and other provisions required by the GPL or the LGPL. If you do not delete |
---|
34 | * the provisions above, a recipient may use your version of this file under |
---|
35 | * the terms of any one of the NPL, the GPL or the LGPL. |
---|
36 | * |
---|
37 | * ***** END LICENSE BLOCK ***** */ |
---|
38 | |
---|
39 | //#define __INCREMENTAL 1 |
---|
40 | |
---|
41 | #include "nsScanner.h" |
---|
42 | #include "nsDebug.h" |
---|
43 | #include "nsIServiceManager.h" |
---|
44 | #include "nsICharsetConverterManager.h" |
---|
45 | #include "nsICharsetAlias.h" |
---|
46 | #include "nsReadableUtils.h" |
---|
47 | #include "nsIInputStream.h" |
---|
48 | #include "nsILocalFile.h" |
---|
49 | #include "nsNetUtil.h" |
---|
50 | #include "nsUTF8Utils.h" // for LossyConvertEncoding |
---|
51 | #include "nsCRT.h" |
---|
52 | #include "nsParser.h" |
---|
53 | |
---|
54 | static NS_DEFINE_CID(kCharsetAliasCID, NS_CHARSETALIAS_CID); |
---|
55 | |
---|
56 | nsReadEndCondition::nsReadEndCondition(const PRUnichar* aTerminateChars) : |
---|
57 | mChars(aTerminateChars), mFilter(PRUnichar(~0)) // All bits set |
---|
58 | { |
---|
59 | // Build filter that will be used to filter out characters with |
---|
60 | // bits that none of the terminal chars have. This works very well |
---|
61 | // because terminal chars often have only the last 4-6 bits set and |
---|
62 | // normal ascii letters have bit 7 set. Other letters have even higher |
---|
63 | // bits set. |
---|
64 | |
---|
65 | // Calculate filter |
---|
66 | const PRUnichar *current = aTerminateChars; |
---|
67 | PRUnichar terminalChar = *current; |
---|
68 | while (terminalChar) { |
---|
69 | mFilter &= ~terminalChar; |
---|
70 | ++current; |
---|
71 | terminalChar = *current; |
---|
72 | } |
---|
73 | } |
---|
74 | |
---|
75 | static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID); |
---|
76 | |
---|
77 | static const char kBadHTMLText[] ="<H3>Oops...</H3>You just tried to read a non-existent document: <BR>"; |
---|
78 | static const char kUnorderedStringError[] = "String argument must be ordered. Don't you read API's?"; |
---|
79 | |
---|
80 | #ifdef __INCREMENTAL |
---|
81 | const int kBufsize=1; |
---|
82 | #else |
---|
83 | const int kBufsize=64; |
---|
84 | #endif |
---|
85 | |
---|
86 | MOZ_DECL_CTOR_COUNTER(nsScanner) |
---|
87 | |
---|
88 | /** |
---|
89 | * Use this constructor if you want i/o to be based on |
---|
90 | * a single string you hand in during construction. |
---|
91 | * This short cut was added for Javascript. |
---|
92 | * |
---|
93 | * @update gess 5/12/98 |
---|
94 | * @param aMode represents the parser mode (nav, other) |
---|
95 | * @return |
---|
96 | */ |
---|
97 | nsScanner::nsScanner(const nsAString& anHTMLString, const nsACString& aCharset, |
---|
98 | PRInt32 aSource) |
---|
99 | : mParser(nsnull) |
---|
100 | { |
---|
101 | MOZ_COUNT_CTOR(nsScanner); |
---|
102 | |
---|
103 | mTotalRead = anHTMLString.Length(); |
---|
104 | mSlidingBuffer = nsnull; |
---|
105 | mCountRemaining = 0; |
---|
106 | mFirstNonWhitespacePosition = -1; |
---|
107 | AppendToBuffer(anHTMLString); |
---|
108 | mSlidingBuffer->BeginReading(mCurrentPosition); |
---|
109 | mMarkPosition = mCurrentPosition; |
---|
110 | mIncremental = PR_FALSE; |
---|
111 | mUnicodeDecoder = 0; |
---|
112 | mCharsetSource = kCharsetUninitialized; |
---|
113 | SetDocumentCharset(aCharset, aSource); |
---|
114 | } |
---|
115 | |
---|
116 | /** |
---|
117 | * Use this constructor if you want i/o to be based on strings |
---|
118 | * the scanner receives. If you pass a null filename, you |
---|
119 | * can still provide data to the scanner via append. |
---|
120 | * |
---|
121 | * @update gess 5/12/98 |
---|
122 | * @param aFilename -- |
---|
123 | * @return |
---|
124 | */ |
---|
125 | nsScanner::nsScanner(nsString& aFilename,PRBool aCreateStream, |
---|
126 | const nsACString& aCharset, PRInt32 aSource) |
---|
127 | : mFilename(aFilename), mParser(nsnull) |
---|
128 | { |
---|
129 | MOZ_COUNT_CTOR(nsScanner); |
---|
130 | |
---|
131 | mSlidingBuffer = nsnull; |
---|
132 | |
---|
133 | // XXX This is a big hack. We need to initialize the iterators to something. |
---|
134 | // What matters is that mCurrentPosition == mEndPosition, so that our methods |
---|
135 | // believe that we are at EOF (see bug 182067). We null out mCurrentPosition |
---|
136 | // so that we have some hope of catching null pointer dereferences associated |
---|
137 | // with this hack. --darin |
---|
138 | memset(&mCurrentPosition, 0, sizeof(mCurrentPosition)); |
---|
139 | mMarkPosition = mCurrentPosition; |
---|
140 | mEndPosition = mCurrentPosition; |
---|
141 | |
---|
142 | mIncremental = PR_TRUE; |
---|
143 | mFirstNonWhitespacePosition = -1; |
---|
144 | mCountRemaining = 0; |
---|
145 | mTotalRead=0; |
---|
146 | |
---|
147 | if(aCreateStream) { |
---|
148 | nsCOMPtr<nsILocalFile> file; |
---|
149 | nsCOMPtr<nsIInputStream> fileStream; |
---|
150 | |
---|
151 | NS_NewLocalFile(aFilename, PR_TRUE, getter_AddRefs(file)); |
---|
152 | if (file) |
---|
153 | NS_NewLocalFileInputStream(getter_AddRefs(mInputStream), file); |
---|
154 | |
---|
155 | } //if |
---|
156 | mUnicodeDecoder = 0; |
---|
157 | mCharsetSource = kCharsetUninitialized; |
---|
158 | SetDocumentCharset(aCharset, aSource); |
---|
159 | } |
---|
160 | |
---|
161 | /** |
---|
162 | * Use this constructor if you want i/o to be stream based. |
---|
163 | * |
---|
164 | * @update gess 5/12/98 |
---|
165 | * @param aStream -- |
---|
166 | * @param assumeOwnership -- |
---|
167 | * @param aFilename -- |
---|
168 | * @return |
---|
169 | */ |
---|
170 | nsScanner::nsScanner(const nsAString& aFilename, nsIInputStream* aStream, |
---|
171 | const nsACString& aCharset, PRInt32 aSource) |
---|
172 | : mFilename(aFilename), mParser(nsnull) |
---|
173 | { |
---|
174 | MOZ_COUNT_CTOR(nsScanner); |
---|
175 | |
---|
176 | mSlidingBuffer = nsnull; |
---|
177 | |
---|
178 | // XXX This is a big hack. We need to initialize the iterators to something. |
---|
179 | // What matters is that mCurrentPosition == mEndPosition, so that our methods |
---|
180 | // believe that we are at EOF (see bug 182067). We null out mCurrentPosition |
---|
181 | // so that we have some hope of catching null pointer dereferences associated |
---|
182 | // with this hack. --darin |
---|
183 | memset(&mCurrentPosition, 0, sizeof(mCurrentPosition)); |
---|
184 | mMarkPosition = mCurrentPosition; |
---|
185 | mEndPosition = mCurrentPosition; |
---|
186 | |
---|
187 | mIncremental = PR_FALSE; |
---|
188 | mFirstNonWhitespacePosition = -1; |
---|
189 | mCountRemaining = 0; |
---|
190 | mTotalRead=0; |
---|
191 | mInputStream=aStream; |
---|
192 | mUnicodeDecoder = 0; |
---|
193 | mCharsetSource = kCharsetUninitialized; |
---|
194 | SetDocumentCharset(aCharset, aSource); |
---|
195 | } |
---|
196 | |
---|
197 | |
---|
198 | nsresult nsScanner::SetDocumentCharset(const nsACString& aCharset , PRInt32 aSource) { |
---|
199 | |
---|
200 | nsresult res = NS_OK; |
---|
201 | |
---|
202 | if( aSource < mCharsetSource) // priority is lower the the current one , just |
---|
203 | return res; |
---|
204 | |
---|
205 | nsCOMPtr<nsICharsetAlias> calias(do_GetService(kCharsetAliasCID, &res)); |
---|
206 | NS_ASSERTION( nsnull != calias, "cannot find charset alias"); |
---|
207 | if( NS_SUCCEEDED(res) && (nsnull != calias)) |
---|
208 | { |
---|
209 | PRBool same = PR_FALSE; |
---|
210 | res = calias->Equals(aCharset, mCharset, &same); |
---|
211 | if(NS_SUCCEEDED(res) && same) |
---|
212 | { |
---|
213 | return NS_OK; // no difference, don't change it |
---|
214 | } |
---|
215 | // different, need to change it |
---|
216 | nsCAutoString charsetName; |
---|
217 | res = calias->GetPreferred(aCharset, charsetName); |
---|
218 | |
---|
219 | if(NS_FAILED(res) && (kCharsetUninitialized == mCharsetSource) ) |
---|
220 | { |
---|
221 | // failed - unknown alias , fallback to ISO-8859-1 |
---|
222 | charsetName.Assign(NS_LITERAL_CSTRING("ISO-8859-1")); |
---|
223 | } |
---|
224 | mCharset = charsetName; |
---|
225 | mCharsetSource = aSource; |
---|
226 | |
---|
227 | nsCOMPtr<nsICharsetConverterManager> ccm = |
---|
228 | do_GetService(kCharsetConverterManagerCID, &res); |
---|
229 | if(NS_SUCCEEDED(res) && (nsnull != ccm)) |
---|
230 | { |
---|
231 | nsIUnicodeDecoder * decoder = nsnull; |
---|
232 | res = ccm->GetUnicodeDecoderRaw(mCharset.get(), &decoder); |
---|
233 | if(NS_SUCCEEDED(res) && (nsnull != decoder)) |
---|
234 | { |
---|
235 | NS_IF_RELEASE(mUnicodeDecoder); |
---|
236 | |
---|
237 | mUnicodeDecoder = decoder; |
---|
238 | } |
---|
239 | } |
---|
240 | } |
---|
241 | return res; |
---|
242 | } |
---|
243 | |
---|
244 | |
---|
245 | /** |
---|
246 | * default destructor |
---|
247 | * |
---|
248 | * @update gess 3/25/98 |
---|
249 | * @param |
---|
250 | * @return |
---|
251 | */ |
---|
252 | nsScanner::~nsScanner() { |
---|
253 | |
---|
254 | if (mSlidingBuffer) { |
---|
255 | delete mSlidingBuffer; |
---|
256 | } |
---|
257 | |
---|
258 | MOZ_COUNT_DTOR(nsScanner); |
---|
259 | |
---|
260 | if(mInputStream) { |
---|
261 | mInputStream->Close(); |
---|
262 | mInputStream = 0; |
---|
263 | } |
---|
264 | |
---|
265 | NS_IF_RELEASE(mUnicodeDecoder); |
---|
266 | } |
---|
267 | |
---|
268 | /** |
---|
269 | * Resets current offset position of input stream to marked position. |
---|
270 | * This allows us to back up to this point if the need should arise, |
---|
271 | * such as when tokenization gets interrupted. |
---|
272 | * NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST! |
---|
273 | * |
---|
274 | * @update gess 5/12/98 |
---|
275 | * @param |
---|
276 | * @return |
---|
277 | */ |
---|
278 | void nsScanner::RewindToMark(void){ |
---|
279 | mCountRemaining += (Distance(mMarkPosition, mCurrentPosition)); |
---|
280 | mCurrentPosition = mMarkPosition; |
---|
281 | } |
---|
282 | |
---|
283 | |
---|
284 | /** |
---|
285 | * Records current offset position in input stream. This allows us |
---|
286 | * to back up to this point if the need should arise, such as when |
---|
287 | * tokenization gets interrupted. |
---|
288 | * |
---|
289 | * @update gess 7/29/98 |
---|
290 | * @param |
---|
291 | * @return |
---|
292 | */ |
---|
293 | void nsScanner::Mark() { |
---|
294 | if (mSlidingBuffer) { |
---|
295 | mSlidingBuffer->DiscardPrefix(mCurrentPosition); |
---|
296 | mSlidingBuffer->BeginReading(mCurrentPosition); |
---|
297 | mMarkPosition = mCurrentPosition; |
---|
298 | } |
---|
299 | } |
---|
300 | |
---|
301 | |
---|
302 | /** |
---|
303 | * Insert data to our underlying input buffer as |
---|
304 | * if it were read from an input stream. |
---|
305 | * |
---|
306 | * @update harishd 01/12/99 |
---|
307 | * @return error code |
---|
308 | */ |
---|
309 | PRBool nsScanner::UngetReadable(const nsAString& aBuffer) { |
---|
310 | |
---|
311 | mSlidingBuffer->UngetReadable(aBuffer,mCurrentPosition); |
---|
312 | mSlidingBuffer->BeginReading(mCurrentPosition); // Insertion invalidated our iterators |
---|
313 | mSlidingBuffer->EndReading(mEndPosition); |
---|
314 | |
---|
315 | PRUint32 length = aBuffer.Length(); |
---|
316 | mCountRemaining += length; // Ref. bug 117441 |
---|
317 | mTotalRead += length; |
---|
318 | return PR_TRUE; |
---|
319 | } |
---|
320 | |
---|
321 | /** |
---|
322 | * Append data to our underlying input buffer as |
---|
323 | * if it were read from an input stream. |
---|
324 | * |
---|
325 | * @update gess4/3/98 |
---|
326 | * @return error code |
---|
327 | */ |
---|
328 | nsresult nsScanner::Append(const nsAString& aBuffer) { |
---|
329 | |
---|
330 | mTotalRead += aBuffer.Length(); |
---|
331 | AppendToBuffer(aBuffer); |
---|
332 | return NS_OK; |
---|
333 | } |
---|
334 | |
---|
335 | /** |
---|
336 | * |
---|
337 | * |
---|
338 | * @update gess 5/21/98 |
---|
339 | * @param |
---|
340 | * @return |
---|
341 | */ |
---|
342 | nsresult nsScanner::Append(const char* aBuffer, PRUint32 aLen, |
---|
343 | nsIRequest *aRequest) |
---|
344 | { |
---|
345 | nsresult res=NS_OK; |
---|
346 | PRUnichar *unichars, *start; |
---|
347 | if(mUnicodeDecoder) { |
---|
348 | PRInt32 unicharBufLen = 0; |
---|
349 | mUnicodeDecoder->GetMaxLength(aBuffer, aLen, &unicharBufLen); |
---|
350 | nsScannerString::Buffer* buffer = nsScannerString::AllocBuffer(unicharBufLen + 1); |
---|
351 | NS_ENSURE_TRUE(buffer,NS_ERROR_OUT_OF_MEMORY); |
---|
352 | start = unichars = buffer->DataStart(); |
---|
353 | |
---|
354 | PRInt32 totalChars = 0; |
---|
355 | PRInt32 unicharLength = unicharBufLen; |
---|
356 | do { |
---|
357 | PRInt32 srcLength = aLen; |
---|
358 | res = mUnicodeDecoder->Convert(aBuffer, &srcLength, unichars, &unicharLength); |
---|
359 | |
---|
360 | totalChars += unicharLength; |
---|
361 | // Continuation of failure case |
---|
362 | if(NS_FAILED(res)) { |
---|
363 | // if we failed, we consume one byte, replace it with U+FFFD |
---|
364 | // and try the conversion again. |
---|
365 | |
---|
366 | // This is only needed because some decoders don't follow the |
---|
367 | // nsIUnicodeDecoder contract: they return a failure when *aDestLength |
---|
368 | // is 0 rather than the correct NS_OK_UDEC_MOREOUTPUT. See bug 244177 |
---|
369 | if ((unichars + unicharLength) >= buffer->DataEnd()) { |
---|
370 | NS_ERROR("Unexpected end of destination buffer"); |
---|
371 | break; |
---|
372 | } |
---|
373 | |
---|
374 | unichars[unicharLength++] = (PRUnichar)0xFFFD; |
---|
375 | unichars = unichars + unicharLength; |
---|
376 | unicharLength = unicharBufLen - (++totalChars); |
---|
377 | |
---|
378 | mUnicodeDecoder->Reset(); |
---|
379 | |
---|
380 | if(((PRUint32) (srcLength + 1)) > aLen) { |
---|
381 | srcLength = aLen; |
---|
382 | } |
---|
383 | else { |
---|
384 | ++srcLength; |
---|
385 | } |
---|
386 | |
---|
387 | aBuffer += srcLength; |
---|
388 | aLen -= srcLength; |
---|
389 | } |
---|
390 | } while (NS_FAILED(res) && (aLen > 0)); |
---|
391 | |
---|
392 | buffer->SetDataLength(totalChars); |
---|
393 | AppendToBuffer(buffer, aRequest); |
---|
394 | mTotalRead += totalChars; |
---|
395 | |
---|
396 | // Don't propagate return code of unicode decoder |
---|
397 | // since it doesn't reflect on our success or failure |
---|
398 | // - Ref. bug 87110 |
---|
399 | res = NS_OK; |
---|
400 | } |
---|
401 | else { |
---|
402 | AppendASCIItoBuffer(aBuffer, aLen, aRequest); |
---|
403 | mTotalRead+=aLen; |
---|
404 | } |
---|
405 | |
---|
406 | return res; |
---|
407 | } |
---|
408 | |
---|
409 | |
---|
410 | /** |
---|
411 | * Grab data from underlying stream. |
---|
412 | * |
---|
413 | * @update gess4/3/98 |
---|
414 | * @return error code |
---|
415 | */ |
---|
416 | nsresult nsScanner::FillBuffer(void) { |
---|
417 | nsresult result=NS_OK; |
---|
418 | |
---|
419 | if(!mInputStream) { |
---|
420 | #if 0 |
---|
421 | //This is DEBUG code!!!!!! XXX DEBUG XXX |
---|
422 | //If you're here, it means someone tried to load a |
---|
423 | //non-existent document. So as a favor, we emit a |
---|
424 | //little bit of HTML explaining the error. |
---|
425 | if(0==mTotalRead) { |
---|
426 | mBuffer.Append((const char*)kBadHTMLText); |
---|
427 | mBuffer.Append(mFilename); |
---|
428 | mTotalRead+=mBuffer.Length(); |
---|
429 | } |
---|
430 | else |
---|
431 | #endif |
---|
432 | result=kEOF; |
---|
433 | } |
---|
434 | else { |
---|
435 | PRUint32 numread=0; |
---|
436 | char buf[kBufsize+1]; |
---|
437 | buf[kBufsize]=0; |
---|
438 | |
---|
439 | // XXX use ReadSegments to avoid extra buffer copy? --darin |
---|
440 | |
---|
441 | result = mInputStream->Read(buf, kBufsize, &numread); |
---|
442 | if (0 == numread) { |
---|
443 | return kEOF; |
---|
444 | } |
---|
445 | |
---|
446 | if((0<numread) && (0==result)) { |
---|
447 | AppendASCIItoBuffer(buf, numread, nsnull); |
---|
448 | } |
---|
449 | mTotalRead+=numread; |
---|
450 | } |
---|
451 | |
---|
452 | return result; |
---|
453 | } |
---|
454 | |
---|
455 | /** |
---|
456 | * determine if the scanner has reached EOF |
---|
457 | * |
---|
458 | * @update gess 5/12/98 |
---|
459 | * @param |
---|
460 | * @return 0=!eof 1=eof |
---|
461 | */ |
---|
462 | nsresult nsScanner::Eof() { |
---|
463 | nsresult theError=NS_OK; |
---|
464 | |
---|
465 | if (!mSlidingBuffer) { |
---|
466 | return kEOF; |
---|
467 | } |
---|
468 | |
---|
469 | theError=FillBuffer(); |
---|
470 | |
---|
471 | if(NS_OK==theError) { |
---|
472 | if (0==(PRUint32)mSlidingBuffer->Length()) { |
---|
473 | return kEOF; |
---|
474 | } |
---|
475 | } |
---|
476 | |
---|
477 | return theError; |
---|
478 | } |
---|
479 | |
---|
480 | /** |
---|
481 | * retrieve next char from scanners internal input stream |
---|
482 | * |
---|
483 | * @update gess 3/25/98 |
---|
484 | * @param |
---|
485 | * @return error code reflecting read status |
---|
486 | */ |
---|
487 | nsresult nsScanner::GetChar(PRUnichar& aChar) { |
---|
488 | nsresult result=NS_OK; |
---|
489 | aChar=0; |
---|
490 | |
---|
491 | if (!mSlidingBuffer) { |
---|
492 | return kEOF; |
---|
493 | } |
---|
494 | |
---|
495 | if (mCurrentPosition == mEndPosition) { |
---|
496 | result=Eof(); |
---|
497 | } |
---|
498 | |
---|
499 | if(NS_OK == result){ |
---|
500 | aChar=*mCurrentPosition++; |
---|
501 | --mCountRemaining; |
---|
502 | } |
---|
503 | return result; |
---|
504 | } |
---|
505 | |
---|
506 | |
---|
507 | /** |
---|
508 | * peek ahead to consume next char from scanner's internal |
---|
509 | * input buffer |
---|
510 | * |
---|
511 | * @update gess 3/25/98 |
---|
512 | * @param |
---|
513 | * @return |
---|
514 | */ |
---|
515 | nsresult nsScanner::Peek(PRUnichar& aChar, PRUint32 aOffset) { |
---|
516 | nsresult result=NS_OK; |
---|
517 | aChar=0; |
---|
518 | |
---|
519 | if (!mSlidingBuffer) { |
---|
520 | return kEOF; |
---|
521 | } |
---|
522 | |
---|
523 | if (mCurrentPosition == mEndPosition) { |
---|
524 | result=Eof(); |
---|
525 | } |
---|
526 | |
---|
527 | if(NS_OK == result){ |
---|
528 | if (aOffset) { |
---|
529 | while ((NS_OK == result) && (mCountRemaining <= aOffset)) { |
---|
530 | result = Eof(); |
---|
531 | } |
---|
532 | |
---|
533 | if (NS_OK == result) { |
---|
534 | nsScannerIterator pos = mCurrentPosition; |
---|
535 | pos.advance(aOffset); |
---|
536 | aChar=*pos; |
---|
537 | } |
---|
538 | } |
---|
539 | else { |
---|
540 | aChar=*mCurrentPosition; |
---|
541 | } |
---|
542 | } |
---|
543 | |
---|
544 | return result; |
---|
545 | } |
---|
546 | |
---|
547 | nsresult nsScanner::Peek(nsAString& aStr, PRInt32 aNumChars, PRInt32 aOffset) |
---|
548 | { |
---|
549 | if (!mSlidingBuffer) { |
---|
550 | return kEOF; |
---|
551 | } |
---|
552 | |
---|
553 | if (mCurrentPosition == mEndPosition) { |
---|
554 | return Eof(); |
---|
555 | } |
---|
556 | |
---|
557 | nsScannerIterator start, end; |
---|
558 | |
---|
559 | start = mCurrentPosition; |
---|
560 | |
---|
561 | if (mCountRemaining <= aOffset) { |
---|
562 | return kEOF; |
---|
563 | } |
---|
564 | |
---|
565 | if (aOffset > 0) { |
---|
566 | start.advance(aOffset); |
---|
567 | } |
---|
568 | |
---|
569 | if (mCountRemaining < PRUint32(aNumChars + aOffset)) { |
---|
570 | end = mEndPosition; |
---|
571 | } |
---|
572 | else { |
---|
573 | end = start; |
---|
574 | end.advance(aNumChars); |
---|
575 | } |
---|
576 | |
---|
577 | CopyUnicodeTo(start, end, aStr); |
---|
578 | |
---|
579 | return NS_OK; |
---|
580 | } |
---|
581 | |
---|
582 | |
---|
583 | /** |
---|
584 | * Skip whitespace on scanner input stream |
---|
585 | * |
---|
586 | * @update gess 3/25/98 |
---|
587 | * @param |
---|
588 | * @return error status |
---|
589 | */ |
---|
590 | nsresult nsScanner::SkipWhitespace(PRInt32& aNewlinesSkipped) { |
---|
591 | |
---|
592 | if (!mSlidingBuffer) { |
---|
593 | return kEOF; |
---|
594 | } |
---|
595 | |
---|
596 | PRUnichar theChar = 0; |
---|
597 | nsresult result = Peek(theChar); |
---|
598 | |
---|
599 | if (result == kEOF) { |
---|
600 | // XXX why wouldn't Eof() return kEOF?? --darin |
---|
601 | return Eof(); |
---|
602 | } |
---|
603 | |
---|
604 | nsScannerIterator current = mCurrentPosition; |
---|
605 | PRBool done = PR_FALSE; |
---|
606 | PRBool skipped = PR_FALSE; |
---|
607 | |
---|
608 | while (!done && current != mEndPosition) { |
---|
609 | switch(theChar) { |
---|
610 | case '\n': |
---|
611 | case '\r': ++aNewlinesSkipped; |
---|
612 | case ' ' : |
---|
613 | case '\b': |
---|
614 | case '\t': |
---|
615 | { |
---|
616 | skipped = PR_TRUE; |
---|
617 | PRUnichar thePrevChar = theChar; |
---|
618 | theChar = (++current != mEndPosition) ? *current : '\0'; |
---|
619 | if ((thePrevChar == '\r' && theChar == '\n') || |
---|
620 | (thePrevChar == '\n' && theChar == '\r')) { |
---|
621 | theChar = (++current != mEndPosition) ? *current : '\0'; // CRLF == LFCR => LF |
---|
622 | } |
---|
623 | } |
---|
624 | break; |
---|
625 | default: |
---|
626 | done = PR_TRUE; |
---|
627 | break; |
---|
628 | } |
---|
629 | } |
---|
630 | |
---|
631 | if (skipped) { |
---|
632 | SetPosition(current); |
---|
633 | if (current == mEndPosition) { |
---|
634 | result = Eof(); |
---|
635 | } |
---|
636 | } |
---|
637 | |
---|
638 | return result; |
---|
639 | } |
---|
640 | |
---|
641 | /** |
---|
642 | * Skip over chars as long as they equal given char |
---|
643 | * |
---|
644 | * @update gess 3/25/98 |
---|
645 | * @param |
---|
646 | * @return error code |
---|
647 | */ |
---|
648 | nsresult nsScanner::SkipOver(PRUnichar aSkipChar){ |
---|
649 | |
---|
650 | if (!mSlidingBuffer) { |
---|
651 | return kEOF; |
---|
652 | } |
---|
653 | |
---|
654 | PRUnichar ch=0; |
---|
655 | nsresult result=NS_OK; |
---|
656 | |
---|
657 | while(NS_OK==result) { |
---|
658 | result=Peek(ch); |
---|
659 | if(NS_OK == result) { |
---|
660 | if(ch!=aSkipChar) { |
---|
661 | break; |
---|
662 | } |
---|
663 | GetChar(ch); |
---|
664 | } |
---|
665 | else break; |
---|
666 | } //while |
---|
667 | return result; |
---|
668 | |
---|
669 | } |
---|
670 | |
---|
671 | /** |
---|
672 | * Skip over chars as long as they're in aSkipSet |
---|
673 | * |
---|
674 | * @update gess 3/25/98 |
---|
675 | * @param aSkipSet is an ordered string. |
---|
676 | * @return error code |
---|
677 | */ |
---|
678 | nsresult nsScanner::SkipOver(nsString& aSkipSet){ |
---|
679 | |
---|
680 | if (!mSlidingBuffer) { |
---|
681 | return kEOF; |
---|
682 | } |
---|
683 | |
---|
684 | PRUnichar theChar=0; |
---|
685 | nsresult result=NS_OK; |
---|
686 | |
---|
687 | while(NS_OK==result) { |
---|
688 | result=Peek(theChar); |
---|
689 | if(NS_OK == result) { |
---|
690 | PRInt32 pos=aSkipSet.FindChar(theChar); |
---|
691 | if(kNotFound==pos) { |
---|
692 | break; |
---|
693 | } |
---|
694 | GetChar(theChar); |
---|
695 | } |
---|
696 | else break; |
---|
697 | } //while |
---|
698 | return result; |
---|
699 | |
---|
700 | } |
---|
701 | |
---|
702 | |
---|
703 | /** |
---|
704 | * Skip over chars until they're in aValidSet |
---|
705 | * |
---|
706 | * @update gess 3/25/98 |
---|
707 | * @param aValid set is an ordered string that |
---|
708 | * contains chars you're looking for |
---|
709 | * @return error code |
---|
710 | */ |
---|
711 | nsresult nsScanner::SkipTo(nsString& aValidSet){ |
---|
712 | if (!mSlidingBuffer) { |
---|
713 | return kEOF; |
---|
714 | } |
---|
715 | |
---|
716 | PRUnichar ch=0; |
---|
717 | nsresult result=NS_OK; |
---|
718 | |
---|
719 | while(NS_OK==result) { |
---|
720 | result=Peek(ch); |
---|
721 | if(NS_OK == result) { |
---|
722 | PRInt32 pos=aValidSet.FindChar(ch); |
---|
723 | if(kNotFound!=pos) { |
---|
724 | break; |
---|
725 | } |
---|
726 | GetChar(ch); |
---|
727 | } |
---|
728 | else break; |
---|
729 | } //while |
---|
730 | return result; |
---|
731 | } |
---|
732 | |
---|
733 | #if 0 |
---|
734 | void DoErrTest(nsString& aString) { |
---|
735 | PRInt32 pos=aString.FindChar(0); |
---|
736 | if(kNotFound<pos) { |
---|
737 | if(aString.Length()-1!=pos) { |
---|
738 | } |
---|
739 | } |
---|
740 | } |
---|
741 | |
---|
742 | void DoErrTest(nsCString& aString) { |
---|
743 | PRInt32 pos=aString.FindChar(0); |
---|
744 | if(kNotFound<pos) { |
---|
745 | if(aString.Length()-1!=pos) { |
---|
746 | } |
---|
747 | } |
---|
748 | } |
---|
749 | #endif |
---|
750 | |
---|
751 | /** |
---|
752 | * Skip over chars as long as they're in aValidSet |
---|
753 | * |
---|
754 | * @update gess 3/25/98 |
---|
755 | * @param aValidSet is an ordered string containing the |
---|
756 | * characters you want to skip |
---|
757 | * @return error code |
---|
758 | */ |
---|
759 | nsresult nsScanner::SkipPast(nsString& aValidSet){ |
---|
760 | NS_NOTYETIMPLEMENTED("Error: SkipPast not yet implemented."); |
---|
761 | return NS_OK; |
---|
762 | } |
---|
763 | |
---|
764 | /** |
---|
765 | * Consume characters until you did not find the terminal char |
---|
766 | * |
---|
767 | * @update gess 3/25/98 |
---|
768 | * @param aString - receives new data from stream |
---|
769 | * @param aIgnore - If set ignores ':','-','_','.' |
---|
770 | * @return error code |
---|
771 | */ |
---|
772 | nsresult nsScanner::GetIdentifier(nsString& aString,PRBool allowPunct) { |
---|
773 | |
---|
774 | if (!mSlidingBuffer) { |
---|
775 | return kEOF; |
---|
776 | } |
---|
777 | |
---|
778 | PRUnichar theChar=0; |
---|
779 | nsresult result=Peek(theChar); |
---|
780 | nsScannerIterator current, end; |
---|
781 | PRBool found=PR_FALSE; |
---|
782 | |
---|
783 | current = mCurrentPosition; |
---|
784 | end = mEndPosition; |
---|
785 | |
---|
786 | while(current != end) { |
---|
787 | |
---|
788 | theChar=*current; |
---|
789 | found=PR_FALSE; |
---|
790 | switch(theChar) { |
---|
791 | case ':': |
---|
792 | case '_': |
---|
793 | case '-': |
---|
794 | case '.': |
---|
795 | found=allowPunct; |
---|
796 | break; |
---|
797 | default: |
---|
798 | found = ('a'<=theChar && theChar<='z') || |
---|
799 | ('A'<=theChar && theChar<='Z') || |
---|
800 | ('0'<=theChar && theChar<='9'); |
---|
801 | break; |
---|
802 | } |
---|
803 | |
---|
804 | if(!found) { |
---|
805 | // If the current character isn't a valid character for |
---|
806 | // the identifier, we're done. Copy the results into |
---|
807 | // the string passed in. |
---|
808 | CopyUnicodeTo(mCurrentPosition, current, aString); |
---|
809 | break; |
---|
810 | } |
---|
811 | ++current; |
---|
812 | } |
---|
813 | |
---|
814 | // Drop NULs on the floor since nobody really likes them. |
---|
815 | while (current != end && !*current) { |
---|
816 | ++current; |
---|
817 | } |
---|
818 | |
---|
819 | SetPosition(current); |
---|
820 | if (current == end) { |
---|
821 | result = Eof(); |
---|
822 | } |
---|
823 | |
---|
824 | //DoErrTest(aString); |
---|
825 | |
---|
826 | return result; |
---|
827 | } |
---|
828 | |
---|
829 | /** |
---|
830 | * Consume characters until you did not find the terminal char |
---|
831 | * |
---|
832 | * @update gess 3/25/98 |
---|
833 | * @param aString - receives new data from stream |
---|
834 | * @param allowPunct - If set ignores ':','-','_','.' |
---|
835 | * @return error code |
---|
836 | */ |
---|
837 | nsresult nsScanner::ReadIdentifier(nsString& aString,PRBool allowPunct) { |
---|
838 | |
---|
839 | if (!mSlidingBuffer) { |
---|
840 | return kEOF; |
---|
841 | } |
---|
842 | |
---|
843 | PRUnichar theChar=0; |
---|
844 | nsresult result=Peek(theChar); |
---|
845 | nsScannerIterator origin, current, end; |
---|
846 | PRBool found=PR_FALSE; |
---|
847 | |
---|
848 | origin = mCurrentPosition; |
---|
849 | current = mCurrentPosition; |
---|
850 | end = mEndPosition; |
---|
851 | |
---|
852 | while(current != end) { |
---|
853 | |
---|
854 | theChar=*current; |
---|
855 | found=PR_FALSE; |
---|
856 | switch(theChar) { |
---|
857 | case ':': |
---|
858 | case '_': |
---|
859 | case '-': |
---|
860 | case '.': |
---|
861 | found=allowPunct; |
---|
862 | break; |
---|
863 | default: |
---|
864 | found = ('a'<=theChar && theChar<='z') || |
---|
865 | ('A'<=theChar && theChar<='Z') || |
---|
866 | ('0'<=theChar && theChar<='9'); |
---|
867 | break; |
---|
868 | } |
---|
869 | |
---|
870 | if(!found) { |
---|
871 | AppendUnicodeTo(mCurrentPosition, current, aString); |
---|
872 | break; |
---|
873 | } |
---|
874 | |
---|
875 | ++current; |
---|
876 | } |
---|
877 | |
---|
878 | // Drop NULs on the floor since nobody really likes them |
---|
879 | while (current != end && !*current) { |
---|
880 | ++current; |
---|
881 | } |
---|
882 | |
---|
883 | SetPosition(current); |
---|
884 | if (current == end) { |
---|
885 | AppendUnicodeTo(origin, current, aString); |
---|
886 | return Eof(); |
---|
887 | } |
---|
888 | |
---|
889 | //DoErrTest(aString); |
---|
890 | |
---|
891 | return result; |
---|
892 | } |
---|
893 | |
---|
894 | nsresult nsScanner::ReadIdentifier(nsScannerIterator& aStart, |
---|
895 | nsScannerIterator& aEnd, |
---|
896 | PRBool allowPunct) { |
---|
897 | |
---|
898 | if (!mSlidingBuffer) { |
---|
899 | return kEOF; |
---|
900 | } |
---|
901 | |
---|
902 | PRUnichar theChar=0; |
---|
903 | nsresult result=Peek(theChar); |
---|
904 | nsScannerIterator origin, current, end; |
---|
905 | PRBool found=PR_FALSE; |
---|
906 | |
---|
907 | origin = mCurrentPosition; |
---|
908 | current = mCurrentPosition; |
---|
909 | end = mEndPosition; |
---|
910 | |
---|
911 | while(current != end) { |
---|
912 | |
---|
913 | theChar=*current; |
---|
914 | if(theChar) { |
---|
915 | found=PR_FALSE; |
---|
916 | switch(theChar) { |
---|
917 | case ':': |
---|
918 | case '_': |
---|
919 | case '-': |
---|
920 | found=allowPunct; |
---|
921 | break; |
---|
922 | default: |
---|
923 | if(('a'<=theChar) && (theChar<='z')) |
---|
924 | found=PR_TRUE; |
---|
925 | else if(('A'<=theChar) && (theChar<='Z')) |
---|
926 | found=PR_TRUE; |
---|
927 | else if(('0'<=theChar) && (theChar<='9')) |
---|
928 | found=PR_TRUE; |
---|
929 | break; |
---|
930 | } |
---|
931 | |
---|
932 | if(!found) { |
---|
933 | aStart = mCurrentPosition; |
---|
934 | aEnd = current; |
---|
935 | break; |
---|
936 | } |
---|
937 | } |
---|
938 | ++current; |
---|
939 | } |
---|
940 | |
---|
941 | SetPosition(current); |
---|
942 | if (current == end) { |
---|
943 | aStart = origin; |
---|
944 | aEnd = current; |
---|
945 | return Eof(); |
---|
946 | } |
---|
947 | |
---|
948 | //DoErrTest(aString); |
---|
949 | |
---|
950 | return result; |
---|
951 | } |
---|
952 | |
---|
953 | /** |
---|
954 | * Consume digits |
---|
955 | * |
---|
956 | * @param aString - should contain digits |
---|
957 | * @return error code |
---|
958 | */ |
---|
959 | nsresult nsScanner::ReadNumber(nsString& aString,PRInt32 aBase) { |
---|
960 | |
---|
961 | if (!mSlidingBuffer) { |
---|
962 | return kEOF; |
---|
963 | } |
---|
964 | |
---|
965 | NS_ASSERTION(aBase == 10 || aBase == 16,"base value not supported"); |
---|
966 | |
---|
967 | PRUnichar theChar=0; |
---|
968 | nsresult result=Peek(theChar); |
---|
969 | nsScannerIterator origin, current, end; |
---|
970 | |
---|
971 | origin = mCurrentPosition; |
---|
972 | current = origin; |
---|
973 | end = mEndPosition; |
---|
974 | |
---|
975 | PRBool done = PR_FALSE; |
---|
976 | while(current != end) { |
---|
977 | theChar=*current; |
---|
978 | if(theChar) { |
---|
979 | done = (theChar < '0' || theChar > '9') && |
---|
980 | ((aBase == 16)? (theChar < 'A' || theChar > 'F') && |
---|
981 | (theChar < 'a' || theChar > 'f') |
---|
982 | :PR_TRUE); |
---|
983 | if(done) { |
---|
984 | AppendUnicodeTo(origin, current, aString); |
---|
985 | break; |
---|
986 | } |
---|
987 | } |
---|
988 | ++current; |
---|
989 | } |
---|
990 | |
---|
991 | SetPosition(current); |
---|
992 | if (current == end) { |
---|
993 | AppendUnicodeTo(origin, current, aString); |
---|
994 | return Eof(); |
---|
995 | } |
---|
996 | |
---|
997 | //DoErrTest(aString); |
---|
998 | |
---|
999 | return result; |
---|
1000 | } |
---|
1001 | |
---|
1002 | /** |
---|
1003 | * Consume characters until you find the terminal char |
---|
1004 | * |
---|
1005 | * @update gess 3/25/98 |
---|
1006 | * @param aString receives new data from stream |
---|
1007 | * @param addTerminal tells us whether to append terminal to aString |
---|
1008 | * @return error code |
---|
1009 | */ |
---|
1010 | nsresult nsScanner::ReadWhitespace(nsString& aString, |
---|
1011 | PRInt32& aNewlinesSkipped) { |
---|
1012 | |
---|
1013 | if (!mSlidingBuffer) { |
---|
1014 | return kEOF; |
---|
1015 | } |
---|
1016 | |
---|
1017 | PRUnichar theChar = 0; |
---|
1018 | nsresult result = Peek(theChar); |
---|
1019 | |
---|
1020 | if (result == kEOF) { |
---|
1021 | return Eof(); |
---|
1022 | } |
---|
1023 | |
---|
1024 | nsScannerIterator origin, current, end; |
---|
1025 | PRBool done = PR_FALSE; |
---|
1026 | |
---|
1027 | origin = mCurrentPosition; |
---|
1028 | current = origin; |
---|
1029 | end = mEndPosition; |
---|
1030 | |
---|
1031 | while(!done && current != end) { |
---|
1032 | switch(theChar) { |
---|
1033 | case '\n': |
---|
1034 | case '\r': ++aNewlinesSkipped; |
---|
1035 | case ' ' : |
---|
1036 | case '\b': |
---|
1037 | case '\t': |
---|
1038 | { |
---|
1039 | PRUnichar thePrevChar = theChar; |
---|
1040 | theChar = (++current != end) ? *current : '\0'; |
---|
1041 | if ((thePrevChar == '\r' && theChar == '\n') || |
---|
1042 | (thePrevChar == '\n' && theChar == '\r')) { |
---|
1043 | theChar = (++current != end) ? *current : '\0'; // CRLF == LFCR => LF |
---|
1044 | } |
---|
1045 | } |
---|
1046 | break; |
---|
1047 | default: |
---|
1048 | done = PR_TRUE; |
---|
1049 | AppendUnicodeTo(origin, current, aString); |
---|
1050 | break; |
---|
1051 | } |
---|
1052 | } |
---|
1053 | |
---|
1054 | SetPosition(current); |
---|
1055 | if (current == end) { |
---|
1056 | AppendUnicodeTo(origin, current, aString); |
---|
1057 | result = Eof(); |
---|
1058 | } |
---|
1059 | |
---|
1060 | return result; |
---|
1061 | } |
---|
1062 | |
---|
1063 | nsresult nsScanner::ReadWhitespace(nsScannerIterator& aStart, |
---|
1064 | nsScannerIterator& aEnd, |
---|
1065 | PRInt32& aNewlinesSkipped) { |
---|
1066 | |
---|
1067 | if (!mSlidingBuffer) { |
---|
1068 | return kEOF; |
---|
1069 | } |
---|
1070 | |
---|
1071 | PRUnichar theChar = 0; |
---|
1072 | nsresult result = Peek(theChar); |
---|
1073 | |
---|
1074 | if (result == kEOF) { |
---|
1075 | return Eof(); |
---|
1076 | } |
---|
1077 | |
---|
1078 | nsScannerIterator origin, current, end; |
---|
1079 | PRBool done = PR_FALSE; |
---|
1080 | |
---|
1081 | origin = mCurrentPosition; |
---|
1082 | current = origin; |
---|
1083 | end = mEndPosition; |
---|
1084 | |
---|
1085 | while(!done && current != end) { |
---|
1086 | switch(theChar) { |
---|
1087 | case '\n': |
---|
1088 | case '\r': ++aNewlinesSkipped; |
---|
1089 | case ' ' : |
---|
1090 | case '\b': |
---|
1091 | case '\t': |
---|
1092 | { |
---|
1093 | PRUnichar thePrevChar = theChar; |
---|
1094 | theChar = (++current != end) ? *current : '\0'; |
---|
1095 | if ((thePrevChar == '\r' && theChar == '\n') || |
---|
1096 | (thePrevChar == '\n' && theChar == '\r')) { |
---|
1097 | theChar = (++current != end) ? *current : '\0'; // CRLF == LFCR => LF |
---|
1098 | } |
---|
1099 | } |
---|
1100 | break; |
---|
1101 | default: |
---|
1102 | done = PR_TRUE; |
---|
1103 | aStart = origin; |
---|
1104 | aEnd = current; |
---|
1105 | break; |
---|
1106 | } |
---|
1107 | } |
---|
1108 | |
---|
1109 | SetPosition(current); |
---|
1110 | if (current == end) { |
---|
1111 | aStart = origin; |
---|
1112 | aEnd = current; |
---|
1113 | result = Eof(); |
---|
1114 | } |
---|
1115 | |
---|
1116 | return result; |
---|
1117 | } |
---|
1118 | |
---|
1119 | /** |
---|
1120 | * Consume chars as long as they are <i>in</i> the |
---|
1121 | * given validSet of input chars. |
---|
1122 | * |
---|
1123 | * @update gess 3/25/98 |
---|
1124 | * @param aString will contain the result of this method |
---|
1125 | * @param aValidSet is an ordered string that contains the |
---|
1126 | * valid characters |
---|
1127 | * @return error code |
---|
1128 | */ |
---|
1129 | nsresult nsScanner::ReadWhile(nsString& aString, |
---|
1130 | nsString& aValidSet, |
---|
1131 | PRBool addTerminal){ |
---|
1132 | |
---|
1133 | if (!mSlidingBuffer) { |
---|
1134 | return kEOF; |
---|
1135 | } |
---|
1136 | |
---|
1137 | PRUnichar theChar=0; |
---|
1138 | nsresult result=Peek(theChar); |
---|
1139 | nsScannerIterator origin, current, end; |
---|
1140 | |
---|
1141 | origin = mCurrentPosition; |
---|
1142 | current = origin; |
---|
1143 | end = mEndPosition; |
---|
1144 | |
---|
1145 | while(current != end) { |
---|
1146 | |
---|
1147 | theChar=*current; |
---|
1148 | if(theChar) { |
---|
1149 | PRInt32 pos=aValidSet.FindChar(theChar); |
---|
1150 | if(kNotFound==pos) { |
---|
1151 | if(addTerminal) |
---|
1152 | ++current; |
---|
1153 | AppendUnicodeTo(origin, current, aString); |
---|
1154 | break; |
---|
1155 | } |
---|
1156 | } |
---|
1157 | ++current; |
---|
1158 | } |
---|
1159 | |
---|
1160 | SetPosition(current); |
---|
1161 | if (current == end) { |
---|
1162 | AppendUnicodeTo(origin, current, aString); |
---|
1163 | return Eof(); |
---|
1164 | } |
---|
1165 | |
---|
1166 | //DoErrTest(aString); |
---|
1167 | |
---|
1168 | return result; |
---|
1169 | |
---|
1170 | } |
---|
1171 | |
---|
1172 | /** |
---|
1173 | * Consume characters until you encounter one contained in given |
---|
1174 | * input set. |
---|
1175 | * |
---|
1176 | * @update gess 3/25/98 |
---|
1177 | * @param aString will contain the result of this method |
---|
1178 | * @param aTerminalSet is an ordered string that contains |
---|
1179 | * the set of INVALID characters |
---|
1180 | * @return error code |
---|
1181 | */ |
---|
1182 | nsresult nsScanner::ReadUntil(nsAString& aString, |
---|
1183 | const nsReadEndCondition& aEndCondition, |
---|
1184 | PRBool addTerminal) |
---|
1185 | { |
---|
1186 | if (!mSlidingBuffer) { |
---|
1187 | return kEOF; |
---|
1188 | } |
---|
1189 | |
---|
1190 | nsScannerIterator origin, current; |
---|
1191 | const PRUnichar* setstart = aEndCondition.mChars; |
---|
1192 | const PRUnichar* setcurrent; |
---|
1193 | |
---|
1194 | origin = mCurrentPosition; |
---|
1195 | current = origin; |
---|
1196 | |
---|
1197 | PRUnichar theChar=0; |
---|
1198 | nsresult result=Peek(theChar); |
---|
1199 | |
---|
1200 | if (result == kEOF) { |
---|
1201 | return Eof(); |
---|
1202 | } |
---|
1203 | |
---|
1204 | while (current != mEndPosition) { |
---|
1205 | // Filter out completely wrong characters |
---|
1206 | // Check if all bits are in the required area |
---|
1207 | if(!(theChar & aEndCondition.mFilter)) { |
---|
1208 | // They were. Do a thorough check. |
---|
1209 | |
---|
1210 | setcurrent = setstart; |
---|
1211 | while (*setcurrent) { |
---|
1212 | if (*setcurrent == theChar) { |
---|
1213 | goto found; |
---|
1214 | } |
---|
1215 | ++setcurrent; |
---|
1216 | } |
---|
1217 | } |
---|
1218 | |
---|
1219 | ++current; |
---|
1220 | theChar = *current; |
---|
1221 | } |
---|
1222 | |
---|
1223 | // If we are here, we didn't find any terminator in the string and |
---|
1224 | // current = mEndPosition |
---|
1225 | SetPosition(current); |
---|
1226 | AppendUnicodeTo(origin, current, aString); |
---|
1227 | return Eof(); |
---|
1228 | |
---|
1229 | found: |
---|
1230 | if(addTerminal) |
---|
1231 | ++current; |
---|
1232 | AppendUnicodeTo(origin, current, aString); |
---|
1233 | SetPosition(current); |
---|
1234 | |
---|
1235 | //DoErrTest(aString); |
---|
1236 | |
---|
1237 | return NS_OK; |
---|
1238 | } |
---|
1239 | |
---|
1240 | nsresult nsScanner::ReadUntil(nsScannerIterator& aStart, |
---|
1241 | nsScannerIterator& aEnd, |
---|
1242 | const nsReadEndCondition &aEndCondition, |
---|
1243 | PRBool addTerminal) |
---|
1244 | { |
---|
1245 | if (!mSlidingBuffer) { |
---|
1246 | return kEOF; |
---|
1247 | } |
---|
1248 | |
---|
1249 | nsScannerIterator origin, current; |
---|
1250 | const PRUnichar* setstart = aEndCondition.mChars; |
---|
1251 | const PRUnichar* setcurrent; |
---|
1252 | |
---|
1253 | origin = mCurrentPosition; |
---|
1254 | current = origin; |
---|
1255 | |
---|
1256 | PRUnichar theChar=0; |
---|
1257 | nsresult result=Peek(theChar); |
---|
1258 | |
---|
1259 | if (result == kEOF) { |
---|
1260 | aStart = aEnd = current; |
---|
1261 | return Eof(); |
---|
1262 | } |
---|
1263 | |
---|
1264 | while (current != mEndPosition) { |
---|
1265 | // Filter out completely wrong characters |
---|
1266 | // Check if all bits are in the required area |
---|
1267 | if(!(theChar & aEndCondition.mFilter)) { |
---|
1268 | // They were. Do a thorough check. |
---|
1269 | setcurrent = setstart; |
---|
1270 | while (*setcurrent) { |
---|
1271 | if (*setcurrent == theChar) { |
---|
1272 | goto found; |
---|
1273 | } |
---|
1274 | ++setcurrent; |
---|
1275 | } |
---|
1276 | } |
---|
1277 | |
---|
1278 | ++current; |
---|
1279 | theChar = *current; |
---|
1280 | } |
---|
1281 | |
---|
1282 | // If we are here, we didn't find any terminator in the string and |
---|
1283 | // current = mEndPosition |
---|
1284 | SetPosition(current); |
---|
1285 | aStart = origin; |
---|
1286 | aEnd = current; |
---|
1287 | return Eof(); |
---|
1288 | |
---|
1289 | found: |
---|
1290 | if(addTerminal) |
---|
1291 | ++current; |
---|
1292 | aStart = origin; |
---|
1293 | aEnd = current; |
---|
1294 | SetPosition(current); |
---|
1295 | |
---|
1296 | return NS_OK; |
---|
1297 | } |
---|
1298 | |
---|
1299 | /** |
---|
1300 | * Consumes chars until you see the given terminalChar |
---|
1301 | * |
---|
1302 | * @update gess 3/25/98 |
---|
1303 | * @param |
---|
1304 | * @return error code |
---|
1305 | */ |
---|
1306 | nsresult nsScanner::ReadUntil(nsAString& aString, |
---|
1307 | PRUnichar aTerminalChar, |
---|
1308 | PRBool addTerminal) |
---|
1309 | { |
---|
1310 | if (!mSlidingBuffer) { |
---|
1311 | return kEOF; |
---|
1312 | } |
---|
1313 | |
---|
1314 | nsScannerIterator origin, current; |
---|
1315 | |
---|
1316 | origin = mCurrentPosition; |
---|
1317 | current = origin; |
---|
1318 | |
---|
1319 | PRUnichar theChar; |
---|
1320 | Peek(theChar); |
---|
1321 | |
---|
1322 | while (current != mEndPosition) { |
---|
1323 | if (aTerminalChar == theChar) { |
---|
1324 | if(addTerminal) |
---|
1325 | ++current; |
---|
1326 | AppendUnicodeTo(origin, current, aString); |
---|
1327 | SetPosition(current); |
---|
1328 | return NS_OK; |
---|
1329 | } |
---|
1330 | ++current; |
---|
1331 | theChar = *current; |
---|
1332 | } |
---|
1333 | |
---|
1334 | // If we are here, we didn't find any terminator in the string and |
---|
1335 | // current = mEndPosition |
---|
1336 | AppendUnicodeTo(origin, current, aString); |
---|
1337 | SetPosition(current); |
---|
1338 | return Eof(); |
---|
1339 | |
---|
1340 | } |
---|
1341 | |
---|
1342 | void nsScanner::BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd) |
---|
1343 | { |
---|
1344 | aSubstring.Rebind(*mSlidingBuffer, aStart, aEnd); |
---|
1345 | } |
---|
1346 | |
---|
1347 | void nsScanner::CurrentPosition(nsScannerIterator& aPosition) |
---|
1348 | { |
---|
1349 | aPosition = mCurrentPosition; |
---|
1350 | } |
---|
1351 | |
---|
1352 | void nsScanner::EndReading(nsScannerIterator& aPosition) |
---|
1353 | { |
---|
1354 | aPosition = mEndPosition; |
---|
1355 | } |
---|
1356 | |
---|
1357 | void nsScanner::SetPosition(nsScannerIterator& aPosition, PRBool aTerminate, PRBool aReverse) |
---|
1358 | { |
---|
1359 | if (mSlidingBuffer) { |
---|
1360 | if (aReverse) { |
---|
1361 | mCountRemaining += (Distance(aPosition, mCurrentPosition)); |
---|
1362 | } |
---|
1363 | else { |
---|
1364 | mCountRemaining -= (Distance(mCurrentPosition, aPosition)); |
---|
1365 | } |
---|
1366 | mCurrentPosition = aPosition; |
---|
1367 | if (aTerminate && (mCurrentPosition == mEndPosition)) { |
---|
1368 | mMarkPosition = mCurrentPosition; |
---|
1369 | mSlidingBuffer->DiscardPrefix(mCurrentPosition); |
---|
1370 | } |
---|
1371 | } |
---|
1372 | } |
---|
1373 | |
---|
1374 | void nsScanner::ReplaceCharacter(nsScannerIterator& aPosition, |
---|
1375 | PRUnichar aChar) |
---|
1376 | { |
---|
1377 | if (mSlidingBuffer) { |
---|
1378 | mSlidingBuffer->ReplaceCharacter(aPosition, aChar); |
---|
1379 | } |
---|
1380 | } |
---|
1381 | |
---|
1382 | void nsScanner::AppendToBuffer(nsScannerString::Buffer* aBuf, |
---|
1383 | nsIRequest *aRequest) |
---|
1384 | { |
---|
1385 | if (nsParser::sParserDataListeners && mParser && |
---|
1386 | NS_FAILED(mParser->DataAdded(Substring(aBuf->DataStart(), |
---|
1387 | aBuf->DataEnd()), aRequest))) { |
---|
1388 | // Don't actually append on failure. |
---|
1389 | |
---|
1390 | return; |
---|
1391 | } |
---|
1392 | |
---|
1393 | if (!mSlidingBuffer) { |
---|
1394 | mSlidingBuffer = new nsScannerString(aBuf); |
---|
1395 | mSlidingBuffer->BeginReading(mCurrentPosition); |
---|
1396 | mMarkPosition = mCurrentPosition; |
---|
1397 | mSlidingBuffer->EndReading(mEndPosition); |
---|
1398 | mCountRemaining = aBuf->DataLength(); |
---|
1399 | } |
---|
1400 | else { |
---|
1401 | mSlidingBuffer->AppendBuffer(aBuf); |
---|
1402 | if (mCurrentPosition == mEndPosition) { |
---|
1403 | mSlidingBuffer->BeginReading(mCurrentPosition); |
---|
1404 | } |
---|
1405 | mSlidingBuffer->EndReading(mEndPosition); |
---|
1406 | mCountRemaining += aBuf->DataLength(); |
---|
1407 | } |
---|
1408 | |
---|
1409 | if (mFirstNonWhitespacePosition == -1) { |
---|
1410 | nsScannerIterator iter(mCurrentPosition); |
---|
1411 | nsScannerIterator end(mEndPosition); |
---|
1412 | |
---|
1413 | while (iter != end) { |
---|
1414 | if (!nsCRT::IsAsciiSpace(*iter)) { |
---|
1415 | mFirstNonWhitespacePosition = Distance(mCurrentPosition, iter); |
---|
1416 | |
---|
1417 | break; |
---|
1418 | } |
---|
1419 | |
---|
1420 | ++iter; |
---|
1421 | } |
---|
1422 | } |
---|
1423 | } |
---|
1424 | |
---|
1425 | void nsScanner::AppendASCIItoBuffer(const char* aData, PRUint32 aLen, |
---|
1426 | nsIRequest *aRequest) |
---|
1427 | { |
---|
1428 | nsScannerString::Buffer* buf = nsScannerString::AllocBuffer(aLen); |
---|
1429 | if (buf) |
---|
1430 | { |
---|
1431 | LossyConvertEncoding<char, PRUnichar> converter(buf->DataStart()); |
---|
1432 | converter.write(aData, aLen); |
---|
1433 | converter.write_terminator(); |
---|
1434 | AppendToBuffer(buf, aRequest); |
---|
1435 | } |
---|
1436 | } |
---|
1437 | |
---|
1438 | /** |
---|
1439 | * call this to copy bytes out of the scanner that have not yet been consumed |
---|
1440 | * by the tokenization process. |
---|
1441 | * |
---|
1442 | * @update gess 5/12/98 |
---|
1443 | * @param aCopyBuffer is where the scanner buffer will be copied to |
---|
1444 | * @return nada |
---|
1445 | */ |
---|
1446 | void nsScanner::CopyUnusedData(nsString& aCopyBuffer) { |
---|
1447 | nsScannerIterator start, end; |
---|
1448 | start = mCurrentPosition; |
---|
1449 | end = mEndPosition; |
---|
1450 | |
---|
1451 | CopyUnicodeTo(start, end, aCopyBuffer); |
---|
1452 | } |
---|
1453 | |
---|
1454 | /** |
---|
1455 | * Retrieve the name of the file that the scanner is reading from. |
---|
1456 | * In some cases, it's just a given name, because the scanner isn't |
---|
1457 | * really reading from a file. |
---|
1458 | * |
---|
1459 | * @update gess 5/12/98 |
---|
1460 | * @return |
---|
1461 | */ |
---|
1462 | nsString& nsScanner::GetFilename(void) { |
---|
1463 | return mFilename; |
---|
1464 | } |
---|
1465 | |
---|
1466 | /** |
---|
1467 | * Conduct self test. Actually, selftesting for this class |
---|
1468 | * occurs in the parser selftest. |
---|
1469 | * |
---|
1470 | * @update gess 3/25/98 |
---|
1471 | * @param |
---|
1472 | * @return |
---|
1473 | */ |
---|
1474 | |
---|
1475 | void nsScanner::SelfTest(void) { |
---|
1476 | #ifdef _DEBUG |
---|
1477 | #endif |
---|
1478 | } |
---|
1479 | |
---|
1480 | |
---|
1481 | |
---|