source: trunk/third/glib2/tests/gen-casemap-txt.pl @ 18159

Revision 18159, 5.8 KB checked in by ghudson, 22 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r18158, which included commits to RCS files with non-trunk default branches.
  • Property svn:executable set to *
Line 
1#! /usr/bin/perl -w
2
3#    Copyright (C) 1998, 1999 Tom Tromey
4#    Copyright (C) 2001 Red Hat Software
5
6#    This program is free software; you can redistribute it and/or modify
7#    it under the terms of the GNU General Public License as published by
8#    the Free Software Foundation; either version 2, or (at your option)
9#    any later version.
10
11#    This program is distributed in the hope that it will be useful,
12#    but WITHOUT ANY WARRANTY; without even the implied warranty of
13#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14#    GNU General Public License for more details.
15
16#    You should have received a copy of the GNU General Public License
17#    along with this program; if not, write to the Free Software
18#    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
19#    02111-1307, USA.
20
21# gen-casemap-test.pl - Generate test cases for case mapping from Unicode data.
22# See http://www.unicode.org/Public/UNIDATA/UnicodeCharacterDatabase.html
23# I consider the output of this program to be unrestricted.  Use it as
24# you will.
25
26use utf8;
27
28if (@ARGV != 3) {
29    $0 =~ s@.*/@@;
30    die "Usage: $0 UNICODE-VERSION UnicodeData.txt SpecialCasing.txt\n";
31}
32 
33use vars qw($CODE $NAME $CATEGORY $COMBINING_CLASSES $BIDI_CATEGORY $DECOMPOSITION $DECIMAL_VALUE $DIGIT_VALUE $NUMERIC_VALUE $MIRRORED $OLD_NAME $COMMENT $UPPER $LOWER $TITLE $BREAK_CODE $BREAK_CATEGORY $BREAK_NAME $CASE_CODE $CASE_LOWER $CASE_TITLE $CASE_UPPER $CASE_CONDITION);
34
35# Names of fields in Unicode data table.
36$CODE = 0;
37$NAME = 1;
38$CATEGORY = 2;
39$COMBINING_CLASSES = 3;
40$BIDI_CATEGORY = 4;
41$DECOMPOSITION = 5;
42$DECIMAL_VALUE = 6;
43$DIGIT_VALUE = 7;
44$NUMERIC_VALUE = 8;
45$MIRRORED = 9;
46$OLD_NAME = 10;
47$COMMENT = 11;
48$UPPER = 12;
49$LOWER = 13;
50$TITLE = 14;
51
52# Names of fields in the SpecialCasing table
53$CASE_CODE = 0;
54$CASE_LOWER = 1;
55$CASE_TITLE = 2;
56$CASE_UPPER = 3;
57$CASE_CONDITION = 4;
58
59my @upper;
60my @title;
61my @lower;
62
63open (INPUT, "< $ARGV[1]") || exit 1;
64
65$last_code = -1;
66while (<INPUT>)
67{
68    chop;
69    @fields = split (';', $_, 30);
70    if ($#fields != 14)
71    {
72        printf STDERR ("Entry for $fields[$CODE] has wrong number of fields (%d)\n", $#fields);
73    }
74
75    $code = hex ($fields[$CODE]);
76
77    last if ($code > 0xFFFF); # ignore characters out of the basic plane
78
79    if ($code > $last_code + 1)
80    {
81        # Found a gap.
82        if ($fields[$NAME] =~ /Last>/)
83        {
84            # Fill the gap with the last character read,
85            # since this was a range specified in the char database
86            @gfields = @fields;
87        }
88        else
89        {
90            # The gap represents undefined characters.  Only the type
91            # matters.
92            @gfields = ('', '', 'Cn', '0', '', '', '', '', '', '', '',
93                        '', '', '', '');
94        }
95        for (++$last_code; $last_code < $code; ++$last_code)
96        {
97            $gfields{$CODE} = sprintf ("%04x", $last_code);
98            &process_one ($last_code, @gfields);
99        }
100    }
101    &process_one ($code, @fields);
102    $last_code = $code;
103}
104
105close INPUT;
106
107open (INPUT, "< $ARGV[2]") || exit 1;
108
109while (<INPUT>)
110{
111    my $code;
112   
113    chop;
114
115    next if /^#/;
116    next if /^\s*$/;
117
118    s/\s*#.*//;
119
120    @fields = split ('\s*;\s*', $_, 30);
121
122    $raw_code = $fields[$CASE_CODE];
123    $code = hex ($raw_code);
124
125    if ($#fields != 4 && $#fields != 5)
126    {
127        printf STDERR ("Entry for $raw_code has wrong number of fields (%d)\n", $#fields);
128        next;
129    }
130
131    if (defined $fields[5]) {
132        # Ignore conditional special cases - we'll handle them manually
133        next;
134    }
135
136    $upper[$code] = &make_hex ($fields[$CASE_UPPER]);
137    $lower[$code] = &make_hex ($fields[$CASE_LOWER]);
138    $title[$code] = &make_hex ($fields[$CASE_TITLE]);
139}
140
141close INPUT;
142
143print <<EOT;
144# Test cases generated from Unicode $ARGV[0] data
145# by gen-case-tests.pl. Do not edit.
146#
147# Some special hand crafted tests
148#
149tr_TR\ti\ti\t\x{0130}\t\x{0130}\t# i => LATIN CAPITAL LETTER I WITH DOT ABOVE
150tr_TR\tI\t\x{0131}\tI\tI\t# I => LATIN SMALL LETTER DOTLESS I
151# Test reordering of YPOGEGRAMMENI across other accents
152\t\x{03b1}\x{0345}\x{0314}\t\x{03b1}\x{0345}\x{314}\t\x{0391}\x{0345}\x{0314}\t\x{0391}\x{0314}\x{0399}\t
153\t\x{03b1}\x{0314}\x{0345}\t\x{03b1}\x{314}\x{0345}\t\x{0391}\x{0314}\x{0345}\t\x{0391}\x{0314}\x{0399}\t
154# Handling of final and nonfinal sigma
155        ΜΆΙΟΣ   μάιος   Μάιος   ΜΆΙΟΣ   
156        ΜΆΙΟΣ   μάιος   Μάιος   ΜΆΙΟΣ   
157        ΣΙΓΜΑ   σιγμα   Σιγμα   ΣΙΓΜΑ   
158# Lithuanian rule of i followed by letter with dot. Not at all sure
159# about the titlecase part here
160lt_LT\ti\x{117}\ti\x{117}\tIe\tIE\t
161lt_LT\tie\x{307}\tie\x{307}\tIe\tIE\t
162#
163# Now the automatic tests
164#
165EOT
166&print_tests;
167
168exit 0;
169
170# Process a single character.
171sub process_one
172{
173    my ($code, @fields) = @_;
174
175    my $type =  $fields[$CATEGORY];
176    if ($type eq 'Ll')
177    {
178        $upper[$code] = make_hex ($fields[$UPPER]);
179        $lower[$code] = pack ("U", $code);
180        $title[$code] = make_hex ($fields[$TITLE]);
181    }
182    elsif ($type eq 'Lu')
183    {
184        $lower[$code] = make_hex ($fields[$LOWER]);
185        $upper[$code] = pack ("U", $code);
186        $title[$code] = make_hex ($fields[$TITLE]);
187    }
188
189    if ($type eq 'Lt')
190    {
191        $upper[$code] = make_hex ($fields[$UPPER]);
192        $lower[$code] = pack ("U", hex ($fields[$LOWER]));
193        $title[$code] = make_hex ($fields[$LOWER]);
194    }
195}
196
197sub print_tests
198{
199    for ($i = 0; $i < 0xffff; $i++) {
200        if ($i == 0x3A3) {
201            # Greek sigma needs special tests
202            next;
203        }
204       
205        my $lower = $lower[$i];
206        my $title = $title[$i];
207        my $upper = $upper[$i];
208
209        if (defined $upper || defined $lower || defined $title) {
210            printf "\t%s\t%s\t%s\t%s\t# %4X\n",
211                    pack ("U", $i),
212                    (defined $lower ? $lower : ""),
213                    (defined $title ? $title : ""),
214                    (defined $upper ? $upper : ""),
215                    $i;
216        }
217    }
218}
219
220sub make_hex
221{
222    my $codes = shift;
223
224    $codes =~ s/^\s+//;
225    $codes =~ s/\s+$//;
226
227    if ($codes eq "0" || $codes eq "") {
228        return "";
229    } else {
230        return pack ("U*", map { hex ($_) } split /\s+/, $codes);
231    }
232}
Note: See TracBrowser for help on using the repository browser.