source: trunk/third/mozilla/htmlparser/public/nsScannerString.h @ 20551

Revision 20551, 17.4 KB checked in by rbasch, 20 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r20550, which included commits to RCS files with non-trunk default branches.
Line 
1/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2/* vim:set ts=2 sw=2 sts=2 et cindent: */
3/* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 *
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
10 *
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
15 *
16 * The Original Code is Mozilla.
17 *
18 * The Initial Developer of the Original Code is IBM Corporation.
19 * Portions created by IBM Corporation are Copyright (C) 2003
20 * IBM Corporation. All Rights Reserved.
21 *
22 * Contributor(s):
23 *   Darin Fisher <darin@meer.net>
24 *
25 * Alternatively, the contents of this file may be used under the terms of
26 * either the GNU General Public License Version 2 or later (the "GPL"), or
27 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
36 *
37 * ***** END LICENSE BLOCK ***** */
38
39#ifndef nsScannerString_h___
40#define nsScannerString_h___
41
42#include "nsString.h"
43#include "nsUnicharUtils.h" // for nsCaseInsensitiveStringComparator
44#include "prclist.h"
45
46
47  /**
48   * NOTE: nsScannerString (and the other classes defined in this file) are
49   * not related to nsAString or any of the other xpcom/string classes.
50   *
51   * nsScannerString is based on the nsSlidingString implementation that used
52   * to live in xpcom/string.  Now that nsAString is limited to representing
53   * only single fragment strings, nsSlidingString can no longer be used.
54   *
55   * An advantage to this design is that it does not employ any virtual
56   * functions.
57   *
58   * This file uses SCC-style indenting in deference to the nsSlidingString
59   * code from which this code is derived ;-)
60   */
61
62class nsScannerIterator;
63class nsScannerSubstring;
64class nsScannerString;
65
66
67  /**
68   * nsScannerBufferList
69   *
70   * This class maintains a list of heap-allocated Buffer objects.  The buffers
71   * are maintained in a circular linked list.  Each buffer has a usage count
72   * that is decremented by the owning nsScannerSubstring.
73   *
74   * The buffer list itself is reference counted.  This allows the buffer list
75   * to be shared by multiple nsScannerSubstring objects.  The reference
76   * counting is not threadsafe, which is not at all a requirement.
77   *
78   * When a nsScannerSubstring releases its reference to a buffer list, it
79   * decrements the usage count of the first buffer in the buffer list that it
80   * was referencing.  It informs the buffer list that it can discard buffers
81   * starting at that prefix.  The buffer list will do so if the usage count of
82   * that buffer is 0 and if it is the first buffer in the list.  It will
83   * continue to prune buffers starting from the front of the buffer list until
84   * it finds a buffer that has a usage count that is non-zero.
85   */
86class nsScannerBufferList
87  {
88    public:
89
90        /**
91         * Buffer objects are directly followed by a data segment.  The start
92         * of the data segment is determined by increment the |this| pointer
93         * by 1 unit.
94         */
95      class Buffer : public PRCList
96        {
97          public:
98
99            void IncrementUsageCount() { ++mUsageCount; }
100            void DecrementUsageCount() { --mUsageCount; }
101
102            PRBool IsInUse() const { return mUsageCount != 0; }
103
104            const PRUnichar* DataStart() const { return (const PRUnichar*) (this+1); }
105                  PRUnichar* DataStart()       { return (      PRUnichar*) (this+1); }
106
107            const PRUnichar* DataEnd() const { return mDataEnd; }
108                  PRUnichar* DataEnd()       { return mDataEnd; }
109
110            const Buffer* Next() const { return NS_STATIC_CAST(const Buffer*, next); }
111                  Buffer* Next()       { return NS_STATIC_CAST(      Buffer*, next); }
112
113            const Buffer* Prev() const { return NS_STATIC_CAST(const Buffer*, prev); }
114                  Buffer* Prev()       { return NS_STATIC_CAST(      Buffer*, prev); }
115
116            PRUint32 DataLength() const { return mDataEnd - DataStart(); }
117            void SetDataLength(PRUint32 len) { mDataEnd = DataStart() + len; }
118
119          private:
120
121            friend class nsScannerBufferList;
122
123            PRInt32    mUsageCount;
124            PRUnichar* mDataEnd;
125        };
126
127        /**
128         * Position objects serve as lightweight pointers into a buffer list.
129         * The mPosition member must be contained with mBuffer->DataStart()
130         * and mBuffer->DataEnd().
131         */
132      class Position
133        {
134          public:
135
136            Position() {}
137           
138            Position( Buffer* buffer, PRUnichar* position )
139              : mBuffer(buffer)
140              , mPosition(position)
141              {}
142           
143            inline
144            Position( const nsScannerIterator& aIter );
145
146            inline
147            Position& operator=( const nsScannerIterator& aIter );
148
149            static size_t Distance( const Position& p1, const Position& p2 );
150
151            Buffer*    mBuffer;
152            PRUnichar* mPosition;
153        };
154
155      static Buffer* AllocBufferFromString( const nsAString& );
156      static Buffer* AllocBuffer( PRUint32 capacity ); // capacity = number of chars
157
158      nsScannerBufferList( Buffer* buf )
159        : mRefCnt(0)
160        {
161          PR_INIT_CLIST(&mBuffers);
162          PR_APPEND_LINK(buf, &mBuffers);
163        }
164
165      void  AddRef()  { ++mRefCnt; }
166      void  Release() { if (--mRefCnt == 0) delete this; }
167
168      void  Append( Buffer* buf ) { PR_APPEND_LINK(buf, &mBuffers); }
169      void  InsertAfter( Buffer* buf, Buffer* prev ) { PR_INSERT_AFTER(buf, prev); }
170      void  SplitBuffer( const Position& );
171      void  DiscardUnreferencedPrefix( Buffer* );
172
173            Buffer* Head()       { return NS_STATIC_CAST(      Buffer*, PR_LIST_HEAD(&mBuffers)); }
174      const Buffer* Head() const { return NS_STATIC_CAST(const Buffer*, PR_LIST_HEAD(&mBuffers)); }
175
176            Buffer* Tail()       { return NS_STATIC_CAST(      Buffer*, PR_LIST_TAIL(&mBuffers)); }
177      const Buffer* Tail() const { return NS_STATIC_CAST(const Buffer*, PR_LIST_TAIL(&mBuffers)); }
178
179    private:
180
181      friend class nsScannerSubstring;
182
183      ~nsScannerBufferList() { ReleaseAll(); }
184      void ReleaseAll();
185
186      PRInt32 mRefCnt;
187      PRCList mBuffers;
188  };
189
190
191  /**
192   * nsScannerFragment represents a "slice" of a Buffer object.
193   */
194struct nsScannerFragment
195  {
196    typedef nsScannerBufferList::Buffer Buffer;
197
198    const Buffer*    mBuffer;
199    const PRUnichar* mFragmentStart;
200    const PRUnichar* mFragmentEnd;
201  };
202
203
204  /**
205   * nsScannerSubstring is the base class for nsScannerString.  It provides
206   * access to iterators and methods to bind the substring to another
207   * substring or nsAString instance.
208   *
209   * This class owns the buffer list.
210   */
211class nsScannerSubstring
212  {
213    public:
214      typedef nsScannerBufferList::Buffer      Buffer;
215      typedef nsScannerBufferList::Position    Position;
216      typedef PRUint32                         size_type;
217
218      nsScannerSubstring();
219      nsScannerSubstring( const nsAString& s );
220
221      ~nsScannerSubstring();
222
223      nsScannerIterator& BeginReading( nsScannerIterator& iter ) const;
224      nsScannerIterator& EndReading( nsScannerIterator& iter ) const;
225
226      size_type Length() const { return mLength; }
227
228      PRInt32 CountChar( PRUnichar ) const;
229
230      void Rebind( const nsScannerSubstring&, const nsScannerIterator&, const nsScannerIterator& );
231      void Rebind( const nsAString& );
232
233      const nsString& AsString() const;
234
235      PRBool GetNextFragment( nsScannerFragment& ) const;
236      PRBool GetPrevFragment( nsScannerFragment& ) const;
237
238      static inline Buffer* AllocBufferFromString( const nsAString& aStr ) { return nsScannerBufferList::AllocBufferFromString(aStr); }
239      static inline Buffer* AllocBuffer( size_type aCapacity )             { return nsScannerBufferList::AllocBuffer(aCapacity); }
240
241    protected:
242
243      void acquire_ownership_of_buffer_list() const
244        {
245          mBufferList->AddRef();
246          mStart.mBuffer->IncrementUsageCount();
247        }
248
249      void release_ownership_of_buffer_list()
250        {
251          if (mBufferList)
252            {
253              mStart.mBuffer->DecrementUsageCount();
254              mBufferList->DiscardUnreferencedPrefix(mStart.mBuffer);
255              mBufferList->Release();
256            }
257        }
258     
259      void init_range_from_buffer_list()
260        {
261          mStart.mBuffer = mBufferList->Head();
262          mStart.mPosition = mStart.mBuffer->DataStart();
263
264          mEnd.mBuffer = mBufferList->Tail();
265          mEnd.mPosition = mEnd.mBuffer->DataEnd();
266
267          mLength = Position::Distance(mStart, mEnd);
268        }
269
270      Position             mStart;
271      Position             mEnd;
272      nsScannerBufferList *mBufferList;
273      size_type            mLength;
274
275      // these fields are used to implement AsString
276      nsString             mFlattenedRep;
277      PRBool               mIsDirty;
278  };
279
280
281  /**
282   * nsScannerString provides methods to grow and modify a buffer list.
283   */
284class nsScannerString : public nsScannerSubstring
285  {
286    public:
287
288      nsScannerString( Buffer* );
289
290        // you are giving ownership to the string, it takes and keeps your
291        // buffer, deleting it when done.
292        // Use AllocBuffer or AllocBufferFromString to create a Buffer object
293        // for use with this function.
294      void AppendBuffer( Buffer* );
295
296      void DiscardPrefix( const nsScannerIterator& );
297        // any other way you want to do this?
298
299      void UngetReadable(const nsAString& aReadable, const nsScannerIterator& aCurrentPosition);
300      void ReplaceCharacter(nsScannerIterator& aPosition, PRUnichar aChar);
301  };
302
303
304  /**
305   * nsScannerIterator works just like nsReadingIterator<CharT> except that
306   * it knows how to iterate over a list of scanner buffers.
307   */
308class nsScannerIterator
309  {
310    public:
311      typedef nsScannerIterator             self_type;
312      typedef ptrdiff_t                     difference_type;
313      typedef PRUnichar                     value_type;
314      typedef const PRUnichar*              pointer;
315      typedef const PRUnichar&              reference;
316      typedef nsScannerSubstring::Buffer    Buffer;
317
318    protected:
319
320      nsScannerFragment         mFragment;
321      const PRUnichar*          mPosition;
322      const nsScannerSubstring* mOwner;
323
324      friend class nsScannerSubstring;
325
326    public:
327      nsScannerIterator() {}
328      // nsScannerIterator( const nsScannerIterator& );             // auto-generated copy-constructor OK
329      // nsScannerIterator& operator=( const nsScannerIterator& );  // auto-generated copy-assignment operator OK
330
331      inline void normalize_forward();
332      inline void normalize_backward();
333
334      pointer get() const
335        {
336          return mPosition;
337        }
338     
339      PRUnichar operator*() const
340        {
341          return *get();
342        }
343
344      const nsScannerFragment& fragment() const
345        {
346          return mFragment;
347        }
348
349      const Buffer* buffer() const
350        {
351          return mFragment.mBuffer;
352        }
353
354      self_type& operator++()
355        {
356          ++mPosition;
357          normalize_forward();
358          return *this;
359        }
360
361      self_type operator++( int )
362        {
363          self_type result(*this);
364          ++mPosition;
365          normalize_forward();
366          return result;
367        }
368
369      self_type& operator--()
370        {
371          normalize_backward();
372          --mPosition;
373          return *this;
374        }
375
376      self_type operator--( int )
377        {
378          self_type result(*this);
379          normalize_backward();
380          --mPosition;
381          return result;
382        }
383
384      difference_type size_forward() const
385        {
386          return mFragment.mFragmentEnd - mPosition;
387        }
388
389      difference_type size_backward() const
390        {
391          return mPosition - mFragment.mFragmentStart;
392        }
393
394      self_type& advance( difference_type n )
395        {
396          while ( n > 0 )
397            {
398              difference_type one_hop = NS_MIN(n, size_forward());
399
400              NS_ASSERTION(one_hop>0, "Infinite loop: can't advance a reading iterator beyond the end of a string");
401                // perhaps I should |break| if |!one_hop|?
402
403              mPosition += one_hop;
404              normalize_forward();
405              n -= one_hop;
406            }
407
408          while ( n < 0 )
409            {
410              normalize_backward();
411              difference_type one_hop = NS_MAX(n, -size_backward());
412
413              NS_ASSERTION(one_hop<0, "Infinite loop: can't advance (backward) a reading iterator beyond the end of a string");
414                // perhaps I should |break| if |!one_hop|?
415
416              mPosition += one_hop;
417              n -= one_hop;
418            }
419
420          return *this;
421        }
422  };
423
424
425inline
426PRBool
427SameFragment( const nsScannerIterator& a, const nsScannerIterator& b )
428  {
429    return a.fragment().mFragmentStart == b.fragment().mFragmentStart;
430  }
431
432
433  /**
434   * this class is needed in order to make use of the methods in nsAlgorithm.h
435   */
436NS_SPECIALIZE_TEMPLATE
437struct nsCharSourceTraits<nsScannerIterator>
438  {
439    typedef nsScannerIterator::difference_type difference_type;
440
441    static
442    PRUint32
443    readable_distance( const nsScannerIterator& first, const nsScannerIterator& last )
444      {
445        return PRUint32(SameFragment(first, last) ? last.get() - first.get() : first.size_forward());
446      }
447
448    static
449    const nsScannerIterator::value_type*
450    read( const nsScannerIterator& iter )
451      {
452        return iter.get();
453      }
454   
455    static
456    void
457    advance( nsScannerIterator& s, difference_type n )
458      {
459        s.advance(n);
460      }
461  };
462
463
464  /**
465   * inline methods follow
466   */
467
468inline
469void
470nsScannerIterator::normalize_forward()
471  {
472    while (mPosition == mFragment.mFragmentEnd && mOwner->GetNextFragment(mFragment))
473      mPosition = mFragment.mFragmentStart;
474  }
475
476inline
477void
478nsScannerIterator::normalize_backward()
479  {
480    while (mPosition == mFragment.mFragmentStart && mOwner->GetPrevFragment(mFragment))
481      mPosition = mFragment.mFragmentEnd;
482  }
483
484inline
485PRBool
486operator==( const nsScannerIterator& lhs, const nsScannerIterator& rhs )
487  {
488    return lhs.get() == rhs.get();
489  }
490
491inline
492PRBool
493operator!=( const nsScannerIterator& lhs, const nsScannerIterator& rhs )
494  {
495    return lhs.get() != rhs.get();
496  }
497
498
499inline
500nsScannerBufferList::Position::Position(const nsScannerIterator& aIter)
501  : mBuffer(NS_CONST_CAST(Buffer*, aIter.buffer()))
502  , mPosition(NS_CONST_CAST(PRUnichar*, aIter.get()))
503  {}
504
505inline
506nsScannerBufferList::Position&
507nsScannerBufferList::Position::operator=(const nsScannerIterator& aIter)
508  {
509    mBuffer   = NS_CONST_CAST(Buffer*, aIter.buffer());
510    mPosition = NS_CONST_CAST(PRUnichar*, aIter.get());
511    return *this;
512  }
513
514
515  /**
516   * scanner string utils
517   *
518   * These methods mimic the API provided by nsReadableUtils in xpcom/string.
519   * Here we provide only the methods that the htmlparser module needs.
520   */
521
522inline
523size_t
524Distance( const nsScannerIterator& aStart, const nsScannerIterator& aEnd )
525  {
526    typedef nsScannerBufferList::Position Position;
527    return Position::Distance(Position(aStart), Position(aEnd));
528  }
529
530void
531CopyUnicodeTo( const nsScannerIterator& aSrcStart,
532               const nsScannerIterator& aSrcEnd,
533               nsAString& aDest );
534
535inline
536void
537CopyUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest )
538  {
539    nsScannerIterator begin, end;
540    CopyUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest);
541  }
542
543void
544AppendUnicodeTo( const nsScannerIterator& aSrcStart,
545                 const nsScannerIterator& aSrcEnd,
546                 nsAString& aDest );
547
548inline
549void
550AppendUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest )
551  {
552    nsScannerIterator begin, end;
553    AppendUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest);
554  }
555
556PRBool
557FindCharInReadable( PRUnichar aChar,
558                    nsScannerIterator& aStart,
559                    const nsScannerIterator& aEnd );
560
561PRBool
562FindInReadable( const nsAString& aPattern,
563                nsScannerIterator& aStart,
564                nsScannerIterator& aEnd,
565                const nsStringComparator& = nsDefaultStringComparator() );
566
567PRBool
568RFindInReadable( const nsAString& aPattern,
569                 nsScannerIterator& aStart,
570                 nsScannerIterator& aEnd,
571                 const nsStringComparator& = nsDefaultStringComparator() );
572
573inline
574PRBool
575CaseInsensitiveFindInReadable( const nsAString& aPattern,
576                               nsScannerIterator& aStart,
577                               nsScannerIterator& aEnd )
578  {
579    return FindInReadable(aPattern, aStart, aEnd,
580                          nsCaseInsensitiveStringComparator());
581  }
582
583#endif // !defined(nsScannerString_h___)
Note: See TracBrowser for help on using the repository browser.