source: trunk/third/mozilla/htmlparser/src/nsScanner.cpp @ 21250

Revision 21250, 34.5 KB checked in by rbasch, 20 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r21249, which included commits to RCS files with non-trunk default branches.
Line 
1/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2/* ***** BEGIN LICENSE BLOCK *****
3 * Version: NPL 1.1/GPL 2.0/LGPL 2.1
4 *
5 * The contents of this file are subject to the Netscape Public License
6 * Version 1.1 (the "License"); you may not use this file except in
7 * compliance with the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/NPL/
9 *
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
14 *
15 * The Original Code is mozilla.org code.
16 *
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
21 *
22 * Contributor(s):
23 *
24 *
25 * Alternatively, the contents of this file may be used under the terms of
26 * either the GNU General Public License Version 2 or later (the "GPL"), or
27 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the NPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the NPL, the GPL or the LGPL.
36 *
37 * ***** END LICENSE BLOCK ***** */
38
39//#define __INCREMENTAL 1
40
41#include "nsScanner.h"
42#include "nsDebug.h"
43#include "nsIServiceManager.h"
44#include "nsICharsetConverterManager.h"
45#include "nsICharsetAlias.h"
46#include "nsReadableUtils.h"
47#include "nsIInputStream.h"
48#include "nsILocalFile.h"
49#include "nsNetUtil.h"
50#include "nsUTF8Utils.h" // for LossyConvertEncoding
51#include "nsCRT.h"
52#include "nsParser.h"
53
54static NS_DEFINE_CID(kCharsetAliasCID, NS_CHARSETALIAS_CID);
55
56nsReadEndCondition::nsReadEndCondition(const PRUnichar* aTerminateChars) :
57  mChars(aTerminateChars), mFilter(PRUnichar(~0)) // All bits set
58{
59  // Build filter that will be used to filter out characters with
60  // bits that none of the terminal chars have. This works very well
61  // because terminal chars often have only the last 4-6 bits set and
62  // normal ascii letters have bit 7 set. Other letters have even higher
63  // bits set.
64 
65  // Calculate filter
66  const PRUnichar *current = aTerminateChars;
67  PRUnichar terminalChar = *current;
68  while (terminalChar) {
69    mFilter &= ~terminalChar;
70    ++current;
71    terminalChar = *current;
72  }
73}
74
75static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
76
77static const char kBadHTMLText[] ="<H3>Oops...</H3>You just tried to read a non-existent document: <BR>";
78static const char kUnorderedStringError[] = "String argument must be ordered. Don't you read API's?";
79
80#ifdef __INCREMENTAL
81const int   kBufsize=1;
82#else
83const int   kBufsize=64;
84#endif
85
86MOZ_DECL_CTOR_COUNTER(nsScanner)
87
88/**
89 *  Use this constructor if you want i/o to be based on
90 *  a single string you hand in during construction.
91 *  This short cut was added for Javascript.
92 *
93 *  @update  gess 5/12/98
94 *  @param   aMode represents the parser mode (nav, other)
95 *  @return 
96 */
97nsScanner::nsScanner(const nsAString& anHTMLString, const nsACString& aCharset,
98                     PRInt32 aSource)
99  : mParser(nsnull)
100{
101  MOZ_COUNT_CTOR(nsScanner);
102
103  mTotalRead = anHTMLString.Length();
104  mSlidingBuffer = nsnull;
105  mCountRemaining = 0;
106  mFirstNonWhitespacePosition = -1;
107  AppendToBuffer(anHTMLString);
108  mSlidingBuffer->BeginReading(mCurrentPosition);
109  mMarkPosition = mCurrentPosition;
110  mIncremental = PR_FALSE;
111  mUnicodeDecoder = 0;
112  mCharsetSource = kCharsetUninitialized;
113  SetDocumentCharset(aCharset, aSource);
114}
115
116/**
117 *  Use this constructor if you want i/o to be based on strings
118 *  the scanner receives. If you pass a null filename, you
119 *  can still provide data to the scanner via append.
120 *
121 *  @update  gess 5/12/98
122 *  @param   aFilename --
123 *  @return 
124 */
125nsScanner::nsScanner(nsString& aFilename,PRBool aCreateStream,
126                     const nsACString& aCharset, PRInt32 aSource)
127  : mFilename(aFilename), mParser(nsnull)
128{
129  MOZ_COUNT_CTOR(nsScanner);
130
131  mSlidingBuffer = nsnull;
132
133  // XXX This is a big hack.  We need to initialize the iterators to something.
134  // What matters is that mCurrentPosition == mEndPosition, so that our methods
135  // believe that we are at EOF (see bug 182067).  We null out mCurrentPosition
136  // so that we have some hope of catching null pointer dereferences associated
137  // with this hack. --darin
138  memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));
139  mMarkPosition = mCurrentPosition;
140  mEndPosition = mCurrentPosition;
141
142  mIncremental = PR_TRUE;
143  mFirstNonWhitespacePosition = -1;
144  mCountRemaining = 0;
145  mTotalRead=0;
146
147  if(aCreateStream) {
148    nsCOMPtr<nsILocalFile> file;
149    nsCOMPtr<nsIInputStream> fileStream;
150   
151    NS_NewLocalFile(aFilename, PR_TRUE, getter_AddRefs(file));
152    if (file)
153      NS_NewLocalFileInputStream(getter_AddRefs(mInputStream), file);
154
155  } //if
156  mUnicodeDecoder = 0;
157  mCharsetSource = kCharsetUninitialized;
158  SetDocumentCharset(aCharset, aSource);
159}
160
161/**
162 *  Use this constructor if you want i/o to be stream based.
163 *
164 *  @update  gess 5/12/98
165 *  @param   aStream --
166 *  @param   assumeOwnership --
167 *  @param   aFilename --
168 *  @return 
169 */
170nsScanner::nsScanner(const nsAString& aFilename, nsIInputStream* aStream,
171                     const nsACString& aCharset, PRInt32 aSource)
172  : mFilename(aFilename), mParser(nsnull)
173
174  MOZ_COUNT_CTOR(nsScanner);
175
176  mSlidingBuffer = nsnull;
177
178  // XXX This is a big hack.  We need to initialize the iterators to something.
179  // What matters is that mCurrentPosition == mEndPosition, so that our methods
180  // believe that we are at EOF (see bug 182067).  We null out mCurrentPosition
181  // so that we have some hope of catching null pointer dereferences associated
182  // with this hack. --darin
183  memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));
184  mMarkPosition = mCurrentPosition;
185  mEndPosition = mCurrentPosition;
186
187  mIncremental = PR_FALSE;
188  mFirstNonWhitespacePosition = -1;
189  mCountRemaining = 0;
190  mTotalRead=0;
191  mInputStream=aStream;
192  mUnicodeDecoder = 0;
193  mCharsetSource = kCharsetUninitialized;
194  SetDocumentCharset(aCharset, aSource);
195}
196
197
198nsresult nsScanner::SetDocumentCharset(const nsACString& aCharset , PRInt32 aSource) {
199
200  nsresult res = NS_OK;
201
202  if( aSource < mCharsetSource) // priority is lower the the current one , just
203    return res;
204
205  nsCOMPtr<nsICharsetAlias> calias(do_GetService(kCharsetAliasCID, &res));
206  NS_ASSERTION( nsnull != calias, "cannot find charset alias");
207  if( NS_SUCCEEDED(res) && (nsnull != calias))
208  {
209    PRBool same = PR_FALSE;
210    res = calias->Equals(aCharset, mCharset, &same);
211    if(NS_SUCCEEDED(res) && same)
212    {
213      return NS_OK; // no difference, don't change it
214    }
215    // different, need to change it
216    nsCAutoString charsetName;
217    res = calias->GetPreferred(aCharset, charsetName);
218
219    if(NS_FAILED(res) && (kCharsetUninitialized == mCharsetSource) )
220    {
221       // failed - unknown alias , fallback to ISO-8859-1
222      charsetName.Assign(NS_LITERAL_CSTRING("ISO-8859-1"));
223    }
224    mCharset = charsetName;
225    mCharsetSource = aSource;
226
227    nsCOMPtr<nsICharsetConverterManager> ccm =
228             do_GetService(kCharsetConverterManagerCID, &res);
229    if(NS_SUCCEEDED(res) && (nsnull != ccm))
230    {
231      nsIUnicodeDecoder * decoder = nsnull;
232      res = ccm->GetUnicodeDecoderRaw(mCharset.get(), &decoder);
233      if(NS_SUCCEEDED(res) && (nsnull != decoder))
234      {
235         NS_IF_RELEASE(mUnicodeDecoder);
236
237         mUnicodeDecoder = decoder;
238      }   
239    }
240  }
241  return res;
242}
243
244
245/**
246 *  default destructor
247 * 
248 *  @update  gess 3/25/98
249 *  @param   
250 *  @return 
251 */
252nsScanner::~nsScanner() {
253
254  if (mSlidingBuffer) {
255    delete mSlidingBuffer;
256  }
257
258  MOZ_COUNT_DTOR(nsScanner);
259
260  if(mInputStream) {
261    mInputStream->Close();
262    mInputStream = 0;
263  }
264
265  NS_IF_RELEASE(mUnicodeDecoder);
266}
267
268/**
269 *  Resets current offset position of input stream to marked position.
270 *  This allows us to back up to this point if the need should arise,
271 *  such as when tokenization gets interrupted.
272 *  NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
273 *
274 *  @update  gess 5/12/98
275 *  @param   
276 *  @return 
277 */
278void nsScanner::RewindToMark(void){
279  mCountRemaining += (Distance(mMarkPosition, mCurrentPosition));
280  mCurrentPosition = mMarkPosition;
281}
282
283
284/**
285 *  Records current offset position in input stream. This allows us
286 *  to back up to this point if the need should arise, such as when
287 *  tokenization gets interrupted.
288 *
289 *  @update  gess 7/29/98
290 *  @param   
291 *  @return 
292 */
293void nsScanner::Mark() {
294  if (mSlidingBuffer) {
295    mSlidingBuffer->DiscardPrefix(mCurrentPosition);
296    mSlidingBuffer->BeginReading(mCurrentPosition);
297    mMarkPosition = mCurrentPosition;
298  }
299}
300 
301
302/**
303 * Insert data to our underlying input buffer as
304 * if it were read from an input stream.
305 *
306 * @update  harishd 01/12/99
307 * @return  error code
308 */
309PRBool nsScanner::UngetReadable(const nsAString& aBuffer) {
310
311  mSlidingBuffer->UngetReadable(aBuffer,mCurrentPosition);
312  mSlidingBuffer->BeginReading(mCurrentPosition); // Insertion invalidated our iterators
313  mSlidingBuffer->EndReading(mEndPosition);
314 
315  PRUint32 length = aBuffer.Length();
316  mCountRemaining += length; // Ref. bug 117441
317  mTotalRead += length;
318  return PR_TRUE;
319}
320
321/**
322 * Append data to our underlying input buffer as
323 * if it were read from an input stream.
324 *
325 * @update  gess4/3/98
326 * @return  error code
327 */
328nsresult nsScanner::Append(const nsAString& aBuffer) {
329 
330  mTotalRead += aBuffer.Length();
331  AppendToBuffer(aBuffer);
332  return NS_OK;
333}
334
335/**
336 * 
337 * 
338 *  @update  gess 5/21/98
339 *  @param   
340 *  @return 
341 */
342nsresult nsScanner::Append(const char* aBuffer, PRUint32 aLen,
343                           nsIRequest *aRequest)
344{
345  nsresult res=NS_OK;
346  PRUnichar *unichars, *start;
347  if(mUnicodeDecoder) {
348    PRInt32 unicharBufLen = 0;
349    mUnicodeDecoder->GetMaxLength(aBuffer, aLen, &unicharBufLen);
350    nsScannerString::Buffer* buffer = nsScannerString::AllocBuffer(unicharBufLen + 1);
351    NS_ENSURE_TRUE(buffer,NS_ERROR_OUT_OF_MEMORY);
352    start = unichars = buffer->DataStart();
353         
354    PRInt32 totalChars = 0;
355    PRInt32 unicharLength = unicharBufLen;
356    do {
357      PRInt32 srcLength = aLen;
358                  res = mUnicodeDecoder->Convert(aBuffer, &srcLength, unichars, &unicharLength);
359
360      totalChars += unicharLength;
361      // Continuation of failure case
362                  if(NS_FAILED(res)) {
363        // if we failed, we consume one byte, replace it with U+FFFD
364        // and try the conversion again.
365
366        // This is only needed because some decoders don't follow the
367        // nsIUnicodeDecoder contract: they return a failure when *aDestLength
368        // is 0 rather than the correct NS_OK_UDEC_MOREOUTPUT.  See bug 244177
369        if ((unichars + unicharLength) >= buffer->DataEnd()) {
370          NS_ERROR("Unexpected end of destination buffer");
371          break;
372        }
373
374        unichars[unicharLength++] = (PRUnichar)0xFFFD;
375        unichars = unichars + unicharLength;
376        unicharLength = unicharBufLen - (++totalChars);
377
378                          mUnicodeDecoder->Reset();
379
380        if(((PRUint32) (srcLength + 1)) > aLen) {
381          srcLength = aLen;
382        }
383        else {
384          ++srcLength;
385        }
386
387        aBuffer += srcLength;
388        aLen -= srcLength;
389                  }
390          } while (NS_FAILED(res) && (aLen > 0));
391
392    buffer->SetDataLength(totalChars);
393    AppendToBuffer(buffer, aRequest);
394    mTotalRead += totalChars;
395
396    // Don't propagate return code of unicode decoder
397    // since it doesn't reflect on our success or failure
398    // - Ref. bug 87110
399    res = NS_OK;
400  }
401  else {
402    AppendASCIItoBuffer(aBuffer, aLen, aRequest);
403    mTotalRead+=aLen;
404  }
405
406  return res;
407}
408
409
410/**
411 * Grab data from underlying stream.
412 *
413 * @update  gess4/3/98
414 * @return  error code
415 */
416nsresult nsScanner::FillBuffer(void) {
417  nsresult result=NS_OK;
418
419  if(!mInputStream) {
420#if 0
421    //This is DEBUG code!!!!!!  XXX DEBUG XXX
422    //If you're here, it means someone tried to load a
423    //non-existent document. So as a favor, we emit a
424    //little bit of HTML explaining the error.
425    if(0==mTotalRead) {
426      mBuffer.Append((const char*)kBadHTMLText);
427      mBuffer.Append(mFilename);
428      mTotalRead+=mBuffer.Length();
429    }
430    else
431#endif
432    result=kEOF;
433  }
434  else {
435    PRUint32 numread=0;
436    char buf[kBufsize+1];
437    buf[kBufsize]=0;
438
439    // XXX use ReadSegments to avoid extra buffer copy? --darin
440
441    result = mInputStream->Read(buf, kBufsize, &numread);
442    if (0 == numread) {
443      return kEOF;
444    }
445
446    if((0<numread) && (0==result)) {
447      AppendASCIItoBuffer(buf, numread, nsnull);
448    }
449    mTotalRead+=numread;
450  }
451
452  return result;
453}
454
455/**
456 *  determine if the scanner has reached EOF
457 * 
458 *  @update  gess 5/12/98
459 *  @param   
460 *  @return  0=!eof 1=eof
461 */
462nsresult nsScanner::Eof() {
463  nsresult theError=NS_OK;
464 
465  if (!mSlidingBuffer) {
466    return kEOF;
467  }
468
469  theError=FillBuffer(); 
470
471  if(NS_OK==theError) {
472    if (0==(PRUint32)mSlidingBuffer->Length()) {
473      return kEOF;
474    }
475  }
476
477  return theError;
478}
479
480/**
481 *  retrieve next char from scanners internal input stream
482 * 
483 *  @update  gess 3/25/98
484 *  @param   
485 *  @return  error code reflecting read status
486 */
487nsresult nsScanner::GetChar(PRUnichar& aChar) {
488  nsresult result=NS_OK;
489  aChar=0; 
490
491  if (!mSlidingBuffer) {
492    return kEOF;
493  }
494
495  if (mCurrentPosition == mEndPosition) {
496    result=Eof();
497  }
498
499  if(NS_OK == result){
500    aChar=*mCurrentPosition++;
501    --mCountRemaining;
502  }
503  return result;
504}
505
506
507/**
508 *  peek ahead to consume next char from scanner's internal
509 *  input buffer
510 * 
511 *  @update  gess 3/25/98
512 *  @param   
513 *  @return 
514 */
515nsresult nsScanner::Peek(PRUnichar& aChar, PRUint32 aOffset) {
516  nsresult result=NS_OK;
517  aChar=0; 
518 
519  if (!mSlidingBuffer) {
520    return kEOF;
521  }
522
523  if (mCurrentPosition == mEndPosition) {
524    result=Eof();
525  }
526
527  if(NS_OK == result){
528    if (aOffset) {
529      while ((NS_OK == result) && (mCountRemaining <= aOffset)) {
530        result = Eof();
531      }
532
533      if (NS_OK == result) {
534        nsScannerIterator pos = mCurrentPosition;
535        pos.advance(aOffset);
536        aChar=*pos;
537      }
538    }
539    else {
540      aChar=*mCurrentPosition;
541    }
542  }
543
544  return result;
545}
546
547nsresult nsScanner::Peek(nsAString& aStr, PRInt32 aNumChars, PRInt32 aOffset)
548{
549  if (!mSlidingBuffer) {
550    return kEOF;
551  }
552
553  if (mCurrentPosition == mEndPosition) {
554    return Eof();
555  }   
556 
557  nsScannerIterator start, end;
558
559  start = mCurrentPosition;
560
561  if (mCountRemaining <= aOffset) {
562    return kEOF;
563  }
564
565  if (aOffset > 0) {
566    start.advance(aOffset);
567  }
568
569  if (mCountRemaining < PRUint32(aNumChars + aOffset)) {
570    end = mEndPosition;
571  }
572  else {
573    end = start;
574    end.advance(aNumChars);
575  }
576
577  CopyUnicodeTo(start, end, aStr);
578
579  return NS_OK;
580}
581
582
583/**
584 *  Skip whitespace on scanner input stream
585 * 
586 *  @update  gess 3/25/98
587 *  @param   
588 *  @return  error status
589 */
590nsresult nsScanner::SkipWhitespace(PRInt32& aNewlinesSkipped) {
591
592  if (!mSlidingBuffer) {
593    return kEOF;
594  }
595
596  PRUnichar theChar = 0;
597  nsresult  result = Peek(theChar);
598 
599  if (result == kEOF) {
600    // XXX why wouldn't Eof() return kEOF?? --darin
601    return Eof();
602  }
603 
604  nsScannerIterator current = mCurrentPosition;
605  PRBool    done = PR_FALSE;
606  PRBool    skipped = PR_FALSE;
607 
608  while (!done && current != mEndPosition) {
609    switch(theChar) {
610      case '\n':
611      case '\r': ++aNewlinesSkipped;
612      case ' ' :
613      case '\b':
614      case '\t':
615        {
616          skipped = PR_TRUE;
617          PRUnichar thePrevChar = theChar;
618          theChar = (++current != mEndPosition) ? *current : '\0';
619          if ((thePrevChar == '\r' && theChar == '\n') ||
620              (thePrevChar == '\n' && theChar == '\r')) {
621            theChar = (++current != mEndPosition) ? *current : '\0'; // CRLF == LFCR => LF
622          }
623        }
624        break;
625      default:
626        done = PR_TRUE;
627        break;
628    }
629  }
630
631  if (skipped) {
632    SetPosition(current);
633    if (current == mEndPosition) {
634      result = Eof();
635    }
636  }
637
638  return result;
639}
640
641/**
642 *  Skip over chars as long as they equal given char
643 * 
644 *  @update  gess 3/25/98
645 *  @param   
646 *  @return  error code
647 */
648nsresult nsScanner::SkipOver(PRUnichar aSkipChar){
649
650  if (!mSlidingBuffer) {
651    return kEOF;
652  }
653
654  PRUnichar ch=0;
655  nsresult   result=NS_OK;
656
657  while(NS_OK==result) {
658    result=Peek(ch);
659    if(NS_OK == result) {
660      if(ch!=aSkipChar) {
661        break;
662      }
663      GetChar(ch);
664    }
665    else break;
666  } //while
667  return result;
668
669}
670
671/**
672 *  Skip over chars as long as they're in aSkipSet
673 * 
674 *  @update  gess 3/25/98
675 *  @param   aSkipSet is an ordered string.
676 *  @return  error code
677 */
678nsresult nsScanner::SkipOver(nsString& aSkipSet){
679
680  if (!mSlidingBuffer) {
681    return kEOF;
682  }
683
684  PRUnichar theChar=0;
685  nsresult  result=NS_OK;
686
687  while(NS_OK==result) {
688    result=Peek(theChar);
689    if(NS_OK == result) {
690      PRInt32 pos=aSkipSet.FindChar(theChar);
691      if(kNotFound==pos) {
692        break;
693      }
694      GetChar(theChar);
695    }
696    else break;
697  } //while
698  return result;
699
700}
701
702
703/**
704 *  Skip over chars until they're in aValidSet
705 * 
706 *  @update  gess 3/25/98
707 *  @param   aValid set is an ordered string that
708 *           contains chars you're looking for
709 *  @return  error code
710 */
711nsresult nsScanner::SkipTo(nsString& aValidSet){
712  if (!mSlidingBuffer) {
713    return kEOF;
714  }
715
716  PRUnichar ch=0;
717  nsresult  result=NS_OK;
718
719  while(NS_OK==result) {
720    result=Peek(ch);
721    if(NS_OK == result) {
722      PRInt32 pos=aValidSet.FindChar(ch);
723      if(kNotFound!=pos) {
724        break;
725      }
726      GetChar(ch);
727    }
728    else break;
729  } //while
730  return result;
731}
732
733#if 0
734void DoErrTest(nsString& aString) {
735  PRInt32 pos=aString.FindChar(0);
736  if(kNotFound<pos) {
737    if(aString.Length()-1!=pos) {
738    }
739  }
740}
741
742void DoErrTest(nsCString& aString) {
743  PRInt32 pos=aString.FindChar(0);
744  if(kNotFound<pos) {
745    if(aString.Length()-1!=pos) {
746    }
747  }
748}
749#endif
750
751/**
752 *  Skip over chars as long as they're in aValidSet
753 * 
754 *  @update  gess 3/25/98
755 *  @param   aValidSet is an ordered string containing the
756 *           characters you want to skip
757 *  @return  error code
758 */
759nsresult nsScanner::SkipPast(nsString& aValidSet){
760  NS_NOTYETIMPLEMENTED("Error: SkipPast not yet implemented.");
761  return NS_OK;
762}
763
764/**
765 *  Consume characters until you did not find the terminal char
766 * 
767 *  @update  gess 3/25/98
768 *  @param   aString - receives new data from stream
769 *  @param   aIgnore - If set ignores ':','-','_','.'
770 *  @return  error code
771 */
772nsresult nsScanner::GetIdentifier(nsString& aString,PRBool allowPunct) {
773
774  if (!mSlidingBuffer) {
775    return kEOF;
776  }
777
778  PRUnichar         theChar=0;
779  nsresult          result=Peek(theChar);
780  nsScannerIterator current, end;
781  PRBool            found=PR_FALSE; 
782 
783  current = mCurrentPosition;
784  end = mEndPosition;
785
786  while(current != end) {
787 
788    theChar=*current;
789    found=PR_FALSE;
790    switch(theChar) {
791      case ':':
792      case '_':
793      case '-':
794      case '.':
795        found=allowPunct;
796        break;
797      default:
798        found = ('a'<=theChar && theChar<='z') ||
799                ('A'<=theChar && theChar<='Z') ||
800                ('0'<=theChar && theChar<='9');
801        break;
802    }
803
804    if(!found) {
805      // If the current character isn't a valid character for
806      // the identifier, we're done. Copy the results into
807      // the string passed in.
808      CopyUnicodeTo(mCurrentPosition, current, aString);
809      break;
810    }
811    ++current;
812  }
813
814  // Drop NULs on the floor since nobody really likes them.
815  while (current != end && !*current) {
816    ++current;
817  }
818
819  SetPosition(current); 
820  if (current == end) {
821    result = Eof();
822  }
823
824  //DoErrTest(aString);
825
826  return result;
827}
828
829/**
830 *  Consume characters until you did not find the terminal char
831 * 
832 *  @update  gess 3/25/98
833 *  @param   aString - receives new data from stream
834 *  @param   allowPunct - If set ignores ':','-','_','.'
835 *  @return  error code
836 */
837nsresult nsScanner::ReadIdentifier(nsString& aString,PRBool allowPunct) {
838
839  if (!mSlidingBuffer) {
840    return kEOF;
841  }
842
843  PRUnichar         theChar=0;
844  nsresult          result=Peek(theChar);
845  nsScannerIterator origin, current, end;
846  PRBool            found=PR_FALSE; 
847
848  origin = mCurrentPosition;
849  current = mCurrentPosition;
850  end = mEndPosition;
851
852  while(current != end) {
853 
854    theChar=*current;
855    found=PR_FALSE;
856    switch(theChar) {
857      case ':':
858      case '_':
859      case '-':
860      case '.':
861        found=allowPunct;
862        break;
863      default:
864        found = ('a'<=theChar && theChar<='z') ||
865                ('A'<=theChar && theChar<='Z') ||
866                ('0'<=theChar && theChar<='9');
867        break;
868    }
869
870    if(!found) {
871      AppendUnicodeTo(mCurrentPosition, current, aString);
872      break;
873    }
874   
875    ++current;
876  }
877 
878  // Drop NULs on the floor since nobody really likes them
879  while (current != end && !*current) {
880    ++current;
881  }
882
883  SetPosition(current);
884  if (current == end) {
885    AppendUnicodeTo(origin, current, aString);
886    return Eof();
887  }
888
889  //DoErrTest(aString);
890
891  return result;
892}
893
894nsresult nsScanner::ReadIdentifier(nsScannerIterator& aStart,
895                                   nsScannerIterator& aEnd,
896                                   PRBool allowPunct) {
897
898  if (!mSlidingBuffer) {
899    return kEOF;
900  }
901
902  PRUnichar         theChar=0;
903  nsresult          result=Peek(theChar);
904  nsScannerIterator origin, current, end;
905  PRBool            found=PR_FALSE; 
906
907  origin = mCurrentPosition;
908  current = mCurrentPosition;
909  end = mEndPosition;
910
911  while(current != end) {
912 
913    theChar=*current;
914    if(theChar) {
915      found=PR_FALSE;
916      switch(theChar) {
917        case ':':
918        case '_':
919        case '-':
920          found=allowPunct;
921          break;
922        default:
923          if(('a'<=theChar) && (theChar<='z'))
924            found=PR_TRUE;
925          else if(('A'<=theChar) && (theChar<='Z'))
926            found=PR_TRUE;
927          else if(('0'<=theChar) && (theChar<='9'))
928            found=PR_TRUE;
929          break;
930      }
931
932      if(!found) {
933        aStart = mCurrentPosition;
934        aEnd = current;
935        break;
936      }
937    }
938    ++current;
939  }
940 
941  SetPosition(current);
942  if (current == end) {
943    aStart = origin;
944    aEnd = current;
945    return Eof();
946  }
947
948  //DoErrTest(aString);
949
950  return result;
951}
952
953/**
954 *  Consume digits
955 * 
956 *  @param   aString - should contain digits
957 *  @return  error code
958 */
959nsresult nsScanner::ReadNumber(nsString& aString,PRInt32 aBase) {
960
961  if (!mSlidingBuffer) {
962    return kEOF;
963  }
964
965  NS_ASSERTION(aBase == 10 || aBase == 16,"base value not supported");
966
967  PRUnichar         theChar=0;
968  nsresult          result=Peek(theChar);
969  nsScannerIterator origin, current, end;
970
971  origin = mCurrentPosition;
972  current = origin;
973  end = mEndPosition;
974
975  PRBool done = PR_FALSE;
976  while(current != end) {
977    theChar=*current;
978    if(theChar) {
979      done = (theChar < '0' || theChar > '9') &&
980             ((aBase == 16)? (theChar < 'A' || theChar > 'F') &&
981                             (theChar < 'a' || theChar > 'f')
982                             :PR_TRUE);
983      if(done) {
984        AppendUnicodeTo(origin, current, aString);
985        break;
986      }
987    }
988    ++current;
989  }
990
991  SetPosition(current);
992  if (current == end) {
993    AppendUnicodeTo(origin, current, aString);
994    return Eof();
995  }
996
997  //DoErrTest(aString);
998
999  return result;
1000}
1001
1002/**
1003 *  Consume characters until you find the terminal char
1004 * 
1005 *  @update  gess 3/25/98
1006 *  @param   aString receives new data from stream
1007 *  @param   addTerminal tells us whether to append terminal to aString
1008 *  @return  error code
1009 */
1010nsresult nsScanner::ReadWhitespace(nsString& aString,
1011                                   PRInt32& aNewlinesSkipped) {
1012
1013  if (!mSlidingBuffer) {
1014    return kEOF;
1015  }
1016
1017  PRUnichar theChar = 0;
1018  nsresult  result = Peek(theChar);
1019 
1020  if (result == kEOF) {
1021    return Eof();
1022  }
1023 
1024  nsScannerIterator origin, current, end;
1025  PRBool done = PR_FALSE; 
1026
1027  origin = mCurrentPosition;
1028  current = origin;
1029  end = mEndPosition;
1030
1031  while(!done && current != end) {
1032    switch(theChar) {
1033      case '\n':
1034      case '\r': ++aNewlinesSkipped;
1035      case ' ' :
1036      case '\b':
1037      case '\t':
1038        {
1039          PRUnichar thePrevChar = theChar;
1040          theChar = (++current != end) ? *current : '\0';
1041          if ((thePrevChar == '\r' && theChar == '\n') ||
1042              (thePrevChar == '\n' && theChar == '\r')) {
1043            theChar = (++current != end) ? *current : '\0'; // CRLF == LFCR => LF
1044          }
1045        }
1046        break;
1047      default:
1048        done = PR_TRUE;
1049        AppendUnicodeTo(origin, current, aString);
1050        break;
1051    }
1052  }
1053
1054  SetPosition(current);
1055  if (current == end) {
1056    AppendUnicodeTo(origin, current, aString);
1057    result = Eof();
1058  }
1059
1060  return result;
1061}
1062
1063nsresult nsScanner::ReadWhitespace(nsScannerIterator& aStart,
1064                                   nsScannerIterator& aEnd,
1065                                   PRInt32& aNewlinesSkipped) {
1066
1067  if (!mSlidingBuffer) {
1068    return kEOF;
1069  }
1070
1071  PRUnichar theChar = 0;
1072  nsresult  result = Peek(theChar);
1073 
1074  if (result == kEOF) {
1075    return Eof();
1076  }
1077 
1078  nsScannerIterator origin, current, end;
1079  PRBool done = PR_FALSE; 
1080
1081  origin = mCurrentPosition;
1082  current = origin;
1083  end = mEndPosition;
1084
1085  while(!done && current != end) {
1086    switch(theChar) {
1087      case '\n':
1088      case '\r': ++aNewlinesSkipped;
1089      case ' ' :
1090      case '\b':
1091      case '\t':
1092        {
1093          PRUnichar thePrevChar = theChar;
1094          theChar = (++current != end) ? *current : '\0';
1095          if ((thePrevChar == '\r' && theChar == '\n') ||
1096              (thePrevChar == '\n' && theChar == '\r')) {
1097            theChar = (++current != end) ? *current : '\0'; // CRLF == LFCR => LF
1098          }
1099        }
1100        break;
1101      default:
1102        done = PR_TRUE;
1103        aStart = origin;
1104        aEnd = current;
1105        break;
1106    }
1107  }
1108
1109  SetPosition(current);
1110  if (current == end) {
1111    aStart = origin;
1112    aEnd = current;
1113    result = Eof();
1114  }
1115
1116  return result;
1117}
1118
1119/**
1120 *  Consume chars as long as they are <i>in</i> the
1121 *  given validSet of input chars.
1122 * 
1123 *  @update  gess 3/25/98
1124 *  @param   aString will contain the result of this method
1125 *  @param   aValidSet is an ordered string that contains the
1126 *           valid characters
1127 *  @return  error code
1128 */
1129nsresult nsScanner::ReadWhile(nsString& aString,
1130                             nsString& aValidSet,
1131                             PRBool addTerminal){
1132
1133  if (!mSlidingBuffer) {
1134    return kEOF;
1135  }
1136
1137  PRUnichar         theChar=0;
1138  nsresult          result=Peek(theChar);
1139  nsScannerIterator origin, current, end;
1140
1141  origin = mCurrentPosition;
1142  current = origin;
1143  end = mEndPosition;
1144
1145  while(current != end) {
1146 
1147    theChar=*current;
1148    if(theChar) {
1149      PRInt32 pos=aValidSet.FindChar(theChar);
1150      if(kNotFound==pos) {
1151        if(addTerminal)
1152          ++current;
1153        AppendUnicodeTo(origin, current, aString);
1154        break;
1155      }
1156    }
1157    ++current;
1158  }
1159
1160  SetPosition(current);
1161  if (current == end) {
1162    AppendUnicodeTo(origin, current, aString);
1163    return Eof();
1164  }
1165
1166  //DoErrTest(aString);
1167
1168  return result;
1169
1170}
1171
1172/**
1173 *  Consume characters until you encounter one contained in given
1174 *  input set.
1175 * 
1176 *  @update  gess 3/25/98
1177 *  @param   aString will contain the result of this method
1178 *  @param   aTerminalSet is an ordered string that contains
1179 *           the set of INVALID characters
1180 *  @return  error code
1181 */
1182nsresult nsScanner::ReadUntil(nsAString& aString,
1183                              const nsReadEndCondition& aEndCondition,
1184                              PRBool addTerminal)
1185
1186  if (!mSlidingBuffer) {
1187    return kEOF;
1188  }
1189
1190  nsScannerIterator origin, current;
1191  const PRUnichar* setstart = aEndCondition.mChars;
1192  const PRUnichar* setcurrent;
1193
1194  origin = mCurrentPosition;
1195  current = origin;
1196
1197  PRUnichar         theChar=0;
1198  nsresult          result=Peek(theChar);
1199
1200  if (result == kEOF) {
1201    return Eof();
1202  }
1203 
1204  while (current != mEndPosition) {
1205    // Filter out completely wrong characters
1206    // Check if all bits are in the required area
1207    if(!(theChar & aEndCondition.mFilter)) {
1208      // They were. Do a thorough check.
1209
1210      setcurrent = setstart;
1211      while (*setcurrent) {
1212        if (*setcurrent == theChar) {
1213          goto found;
1214        }
1215        ++setcurrent;
1216      }
1217    }
1218   
1219    ++current;
1220    theChar = *current;
1221  }
1222
1223  // If we are here, we didn't find any terminator in the string and
1224  // current = mEndPosition
1225  SetPosition(current);
1226  AppendUnicodeTo(origin, current, aString);
1227  return Eof();
1228
1229found:
1230  if(addTerminal)
1231    ++current;
1232  AppendUnicodeTo(origin, current, aString);
1233  SetPosition(current);
1234
1235  //DoErrTest(aString);
1236
1237  return NS_OK;
1238}
1239
1240nsresult nsScanner::ReadUntil(nsScannerIterator& aStart,
1241                              nsScannerIterator& aEnd,
1242                              const nsReadEndCondition &aEndCondition,
1243                              PRBool addTerminal)
1244{
1245  if (!mSlidingBuffer) {
1246    return kEOF;
1247  }
1248
1249  nsScannerIterator origin, current;
1250  const PRUnichar* setstart = aEndCondition.mChars;
1251  const PRUnichar* setcurrent;
1252
1253  origin = mCurrentPosition;
1254  current = origin;
1255
1256  PRUnichar         theChar=0;
1257  nsresult          result=Peek(theChar);
1258 
1259  if (result == kEOF) {
1260    aStart = aEnd = current;
1261    return Eof();
1262  }
1263 
1264  while (current != mEndPosition) {
1265    // Filter out completely wrong characters
1266    // Check if all bits are in the required area
1267    if(!(theChar & aEndCondition.mFilter)) {
1268      // They were. Do a thorough check.
1269      setcurrent = setstart;
1270      while (*setcurrent) {
1271        if (*setcurrent == theChar) {
1272          goto found;
1273        }
1274      ++setcurrent;
1275      }
1276    }
1277   
1278    ++current;
1279    theChar = *current;
1280  }
1281
1282  // If we are here, we didn't find any terminator in the string and
1283  // current = mEndPosition
1284  SetPosition(current);
1285  aStart = origin;
1286  aEnd = current;
1287  return Eof();
1288
1289 found:
1290  if(addTerminal)
1291    ++current;
1292  aStart = origin;
1293  aEnd = current;
1294  SetPosition(current);
1295
1296  return NS_OK;
1297}
1298
1299/**
1300 *  Consumes chars until you see the given terminalChar
1301 * 
1302 *  @update  gess 3/25/98
1303 *  @param   
1304 *  @return  error code
1305 */
1306nsresult nsScanner::ReadUntil(nsAString& aString,
1307                              PRUnichar aTerminalChar,
1308                              PRBool addTerminal)
1309{
1310  if (!mSlidingBuffer) {
1311    return kEOF;
1312  }
1313
1314  nsScannerIterator origin, current;
1315
1316  origin = mCurrentPosition;
1317  current = origin;
1318
1319  PRUnichar theChar;
1320  Peek(theChar);
1321 
1322  while (current != mEndPosition) {
1323    if (aTerminalChar == theChar) {
1324      if(addTerminal)
1325        ++current;
1326      AppendUnicodeTo(origin, current, aString);
1327      SetPosition(current);
1328      return NS_OK;
1329    }
1330    ++current;
1331    theChar = *current;
1332  }
1333
1334  // If we are here, we didn't find any terminator in the string and
1335  // current = mEndPosition
1336  AppendUnicodeTo(origin, current, aString);
1337  SetPosition(current);
1338  return Eof();
1339
1340}
1341
1342void nsScanner::BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd)
1343{
1344  aSubstring.Rebind(*mSlidingBuffer, aStart, aEnd);
1345}
1346
1347void nsScanner::CurrentPosition(nsScannerIterator& aPosition)
1348{
1349  aPosition = mCurrentPosition;
1350}
1351
1352void nsScanner::EndReading(nsScannerIterator& aPosition)
1353{
1354  aPosition = mEndPosition;
1355}
1356 
1357void nsScanner::SetPosition(nsScannerIterator& aPosition, PRBool aTerminate, PRBool aReverse)
1358{
1359  if (mSlidingBuffer) {
1360    if (aReverse) {
1361      mCountRemaining += (Distance(aPosition, mCurrentPosition));
1362    }
1363    else {
1364      mCountRemaining -= (Distance(mCurrentPosition, aPosition));
1365    }
1366    mCurrentPosition = aPosition;
1367    if (aTerminate && (mCurrentPosition == mEndPosition)) {
1368      mMarkPosition = mCurrentPosition;
1369      mSlidingBuffer->DiscardPrefix(mCurrentPosition);
1370    }
1371  }
1372}
1373
1374void nsScanner::ReplaceCharacter(nsScannerIterator& aPosition,
1375                                 PRUnichar aChar)
1376{
1377  if (mSlidingBuffer) {
1378    mSlidingBuffer->ReplaceCharacter(aPosition, aChar);
1379  }
1380}
1381
1382void nsScanner::AppendToBuffer(nsScannerString::Buffer* aBuf,
1383                               nsIRequest *aRequest)
1384{
1385  if (nsParser::sParserDataListeners && mParser &&
1386      NS_FAILED(mParser->DataAdded(Substring(aBuf->DataStart(),
1387                                             aBuf->DataEnd()), aRequest))) {
1388    // Don't actually append on failure.
1389
1390    return;
1391  }
1392
1393  if (!mSlidingBuffer) {
1394    mSlidingBuffer = new nsScannerString(aBuf);
1395    mSlidingBuffer->BeginReading(mCurrentPosition);
1396    mMarkPosition = mCurrentPosition;
1397    mSlidingBuffer->EndReading(mEndPosition);
1398    mCountRemaining = aBuf->DataLength();
1399  }
1400  else {
1401    mSlidingBuffer->AppendBuffer(aBuf);
1402    if (mCurrentPosition == mEndPosition) {
1403      mSlidingBuffer->BeginReading(mCurrentPosition);
1404    }
1405    mSlidingBuffer->EndReading(mEndPosition);
1406    mCountRemaining += aBuf->DataLength();
1407  }
1408
1409  if (mFirstNonWhitespacePosition == -1) {
1410    nsScannerIterator iter(mCurrentPosition);
1411    nsScannerIterator end(mEndPosition);
1412
1413    while (iter != end) {
1414      if (!nsCRT::IsAsciiSpace(*iter)) {
1415        mFirstNonWhitespacePosition = Distance(mCurrentPosition, iter);
1416
1417        break;
1418      }
1419
1420      ++iter;
1421    }
1422  }
1423}
1424
1425void nsScanner::AppendASCIItoBuffer(const char* aData, PRUint32 aLen,
1426                                    nsIRequest *aRequest)
1427{
1428  nsScannerString::Buffer* buf = nsScannerString::AllocBuffer(aLen);
1429  if (buf)
1430  {
1431    LossyConvertEncoding<char, PRUnichar> converter(buf->DataStart());
1432    converter.write(aData, aLen);
1433    converter.write_terminator();
1434    AppendToBuffer(buf, aRequest);
1435  }
1436}
1437
1438/**
1439 *  call this to copy bytes out of the scanner that have not yet been consumed
1440 *  by the tokenization process.
1441 * 
1442 *  @update  gess 5/12/98
1443 *  @param   aCopyBuffer is where the scanner buffer will be copied to
1444 *  @return  nada
1445 */
1446void nsScanner::CopyUnusedData(nsString& aCopyBuffer) {
1447  nsScannerIterator start, end;
1448  start = mCurrentPosition;
1449  end = mEndPosition;
1450
1451  CopyUnicodeTo(start, end, aCopyBuffer);
1452}
1453
1454/**
1455 *  Retrieve the name of the file that the scanner is reading from.
1456 *  In some cases, it's just a given name, because the scanner isn't
1457 *  really reading from a file.
1458 * 
1459 *  @update  gess 5/12/98
1460 *  @return 
1461 */
1462nsString& nsScanner::GetFilename(void) {
1463  return mFilename;
1464}
1465
1466/**
1467 *  Conduct self test. Actually, selftesting for this class
1468 *  occurs in the parser selftest.
1469 * 
1470 *  @update  gess 3/25/98
1471 *  @param   
1472 *  @return 
1473 */
1474
1475void nsScanner::SelfTest(void) {
1476#ifdef _DEBUG
1477#endif
1478}
1479
1480
1481
Note: See TracBrowser for help on using the repository browser.