source: trunk/third/perl/utf8.h @ 14545

Revision 14545, 1.9 KB checked in by ghudson, 25 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r14544, which included commits to RCS files with non-trunk default branches.
Line 
1/*    utf8.h
2 *
3 *    Copyright (c) 1998-2000, Larry Wall
4 *
5 *    You may distribute under the terms of either the GNU General Public
6 *    License or the Artistic License, as specified in the README file.
7 *
8 */
9
10START_EXTERN_C
11
12#ifdef DOINIT
13EXTCONST unsigned char PL_utf8skip[] = {
141,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* ascii */
151,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* ascii */
161,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* ascii */
171,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* ascii */
181,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* bogus */
191,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* bogus */
202,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* scripts */
213,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,     /* cjk etc. */
227,13, /* Perl extended (not UTF-8).  Up to 72bit allowed (64-bit + reserved). */
23};
24#else
25EXTCONST unsigned char PL_utf8skip[];
26#endif
27
28END_EXTERN_C
29
30#define UTF8_MAXLEN 13 /* how wide can a single UTF8 encoded character become */
31
32/*#define IN_UTF8 (PL_curcop->op_private & HINT_UTF8)*/
33#define IN_BYTE (PL_curcop->op_private & HINT_BYTE)
34#define DO_UTF8(sv) (SvUTF8(sv) && !IN_BYTE)
35
36#define UTF8SKIP(s) PL_utf8skip[*(U8*)s]
37
38/*
39 * Note: we try to be careful never to call the isXXX_utf8() functions
40 * unless we're pretty sure we've seen the beginning of a UTF-8 character
41 * (that is, the two high bits are set).  Otherwise we risk loading in the
42 * heavy-duty SWASHINIT and SWASHGET routines unnecessarily.
43 */
44#define isIDFIRST_lazy_if(p,c) ((!c || (*((U8*)p) < 0xc0)) \
45                                ? isIDFIRST(*(p)) \
46                                : isIDFIRST_utf8((U8*)p))
47#define isALNUM_lazy_if(p,c)   ((!c || (*((U8*)p) < 0xc0)) \
48                                ? isALNUM(*(p)) \
49                                : isALNUM_utf8((U8*)p))
50#define isIDFIRST_lazy(p)       isIDFIRST_lazy_if(p,1)
51#define isALNUM_lazy(p)         isALNUM_lazy_if(p,1)
Note: See TracBrowser for help on using the repository browser.