1 | #! /usr/bin/perl |
---|
2 | |
---|
3 | # Program for testing regular expressions with perl to check that PCRE handles |
---|
4 | # them the same. This is the version that supports /8 for UTF-8 testing. It |
---|
5 | # requires at least Perl 5.6. |
---|
6 | |
---|
7 | |
---|
8 | # Function for turning a string into a string of printing chars. There are |
---|
9 | # currently problems with UTF-8 strings; this fudges round them. |
---|
10 | |
---|
11 | sub pchars { |
---|
12 | my($t) = ""; |
---|
13 | |
---|
14 | if ($utf8) |
---|
15 | { |
---|
16 | use utf8; |
---|
17 | @p = unpack('U*', $_[0]); |
---|
18 | foreach $c (@p) |
---|
19 | { |
---|
20 | if ($c >= 32 && $c < 127) { $t .= chr $c; } |
---|
21 | else { $t .= sprintf("\\x{%02x}", $c); } |
---|
22 | } |
---|
23 | } |
---|
24 | |
---|
25 | else |
---|
26 | { |
---|
27 | foreach $c (split(//, $_[0])) |
---|
28 | { |
---|
29 | if (ord $c >= 32 && ord $c < 127) { $t .= $c; } |
---|
30 | else { $t .= sprintf("\\x%02x", ord $c); } |
---|
31 | } |
---|
32 | } |
---|
33 | |
---|
34 | $t; |
---|
35 | } |
---|
36 | |
---|
37 | |
---|
38 | |
---|
39 | # Read lines from named file or stdin and write to named file or stdout; lines |
---|
40 | # consist of a regular expression, in delimiters and optionally followed by |
---|
41 | # options, followed by a set of test data, terminated by an empty line. |
---|
42 | |
---|
43 | # Sort out the input and output files |
---|
44 | |
---|
45 | if (@ARGV > 0) |
---|
46 | { |
---|
47 | open(INFILE, "<$ARGV[0]") || die "Failed to open $ARGV[0]\n"; |
---|
48 | $infile = "INFILE"; |
---|
49 | } |
---|
50 | else { $infile = "STDIN"; } |
---|
51 | |
---|
52 | if (@ARGV > 1) |
---|
53 | { |
---|
54 | open(OUTFILE, ">$ARGV[1]") || die "Failed to open $ARGV[1]\n"; |
---|
55 | $outfile = "OUTFILE"; |
---|
56 | } |
---|
57 | else { $outfile = "STDOUT"; } |
---|
58 | |
---|
59 | printf($outfile "Perl $] Regular Expressions\n\n"); |
---|
60 | |
---|
61 | # Main loop |
---|
62 | |
---|
63 | NEXT_RE: |
---|
64 | for (;;) |
---|
65 | { |
---|
66 | printf " re> " if $infile eq "STDIN"; |
---|
67 | last if ! ($_ = <$infile>); |
---|
68 | printf $outfile "$_" if $infile ne "STDIN"; |
---|
69 | next if ($_ eq ""); |
---|
70 | |
---|
71 | $pattern = $_; |
---|
72 | |
---|
73 | while ($pattern !~ /^\s*(.).*\1/s) |
---|
74 | { |
---|
75 | printf " > " if $infile eq "STDIN"; |
---|
76 | last if ! ($_ = <$infile>); |
---|
77 | printf $outfile "$_" if $infile ne "STDIN"; |
---|
78 | $pattern .= $_; |
---|
79 | } |
---|
80 | |
---|
81 | chomp($pattern); |
---|
82 | $pattern =~ s/\s+$//; |
---|
83 | |
---|
84 | # The private /+ modifier means "print $' afterwards". |
---|
85 | |
---|
86 | $showrest = ($pattern =~ s/\+(?=[a-z]*$)//); |
---|
87 | |
---|
88 | # The private /8 modifier means "operate in UTF-8". Currently, Perl |
---|
89 | # has bugs that we try to work around using this flag. |
---|
90 | |
---|
91 | $utf8 = ($pattern =~ s/8(?=[a-z]*$)//); |
---|
92 | |
---|
93 | # Check that the pattern is valid |
---|
94 | |
---|
95 | if ($utf8) |
---|
96 | { |
---|
97 | use utf8; |
---|
98 | eval "\$_ =~ ${pattern}"; |
---|
99 | } |
---|
100 | else |
---|
101 | { |
---|
102 | eval "\$_ =~ ${pattern}"; |
---|
103 | } |
---|
104 | |
---|
105 | if ($@) |
---|
106 | { |
---|
107 | printf $outfile "Error: $@"; |
---|
108 | next NEXT_RE; |
---|
109 | } |
---|
110 | |
---|
111 | # If the /g modifier is present, we want to put a loop round the matching; |
---|
112 | # otherwise just a single "if". |
---|
113 | |
---|
114 | $cmd = ($pattern =~ /g[a-z]*$/)? "while" : "if"; |
---|
115 | |
---|
116 | # If the pattern is actually the null string, Perl uses the most recently |
---|
117 | # executed (and successfully compiled) regex is used instead. This is a |
---|
118 | # nasty trap for the unwary! The PCRE test suite does contain null strings |
---|
119 | # in places - if they are allowed through here all sorts of weird and |
---|
120 | # unexpected effects happen. To avoid this, we replace such patterns with |
---|
121 | # a non-null pattern that has the same effect. |
---|
122 | |
---|
123 | $pattern = "/(?#)/$2" if ($pattern =~ /^(.)\1(.*)$/); |
---|
124 | |
---|
125 | # Read data lines and test them |
---|
126 | |
---|
127 | for (;;) |
---|
128 | { |
---|
129 | printf "data> " if $infile eq "STDIN"; |
---|
130 | last NEXT_RE if ! ($_ = <$infile>); |
---|
131 | chomp; |
---|
132 | printf $outfile "$_\n" if $infile ne "STDIN"; |
---|
133 | |
---|
134 | s/\s+$//; |
---|
135 | s/^\s+//; |
---|
136 | |
---|
137 | last if ($_ eq ""); |
---|
138 | |
---|
139 | $x = eval "\"$_\""; # To get escapes processed |
---|
140 | |
---|
141 | # Empty array for holding results, then do the matching. |
---|
142 | |
---|
143 | @subs = (); |
---|
144 | |
---|
145 | $pushes = "push \@subs,\$&;" . |
---|
146 | "push \@subs,\$1;" . |
---|
147 | "push \@subs,\$2;" . |
---|
148 | "push \@subs,\$3;" . |
---|
149 | "push \@subs,\$4;" . |
---|
150 | "push \@subs,\$5;" . |
---|
151 | "push \@subs,\$6;" . |
---|
152 | "push \@subs,\$7;" . |
---|
153 | "push \@subs,\$8;" . |
---|
154 | "push \@subs,\$9;" . |
---|
155 | "push \@subs,\$10;" . |
---|
156 | "push \@subs,\$11;" . |
---|
157 | "push \@subs,\$12;" . |
---|
158 | "push \@subs,\$13;" . |
---|
159 | "push \@subs,\$14;" . |
---|
160 | "push \@subs,\$15;" . |
---|
161 | "push \@subs,\$16;" . |
---|
162 | "push \@subs,\$'; }"; |
---|
163 | |
---|
164 | if ($utf8) |
---|
165 | { |
---|
166 | use utf8; |
---|
167 | eval "${cmd} (\$x =~ ${pattern}) {" . $pushes; |
---|
168 | } |
---|
169 | else |
---|
170 | { |
---|
171 | eval "${cmd} (\$x =~ ${pattern}) {" . $pushes; |
---|
172 | } |
---|
173 | |
---|
174 | if ($@) |
---|
175 | { |
---|
176 | printf $outfile "Error: $@\n"; |
---|
177 | next NEXT_RE; |
---|
178 | } |
---|
179 | elsif (scalar(@subs) == 0) |
---|
180 | { |
---|
181 | printf $outfile "No match\n"; |
---|
182 | } |
---|
183 | else |
---|
184 | { |
---|
185 | while (scalar(@subs) != 0) |
---|
186 | { |
---|
187 | printf $outfile (" 0: %s\n", &pchars($subs[0])); |
---|
188 | printf $outfile (" 0+ %s\n", &pchars($subs[17])) if $showrest; |
---|
189 | $last_printed = 0; |
---|
190 | for ($i = 1; $i <= 16; $i++) |
---|
191 | { |
---|
192 | if (defined $subs[$i]) |
---|
193 | { |
---|
194 | while ($last_printed++ < $i-1) |
---|
195 | { printf $outfile ("%2d: <unset>\n", $last_printed); } |
---|
196 | printf $outfile ("%2d: %s\n", $i, &pchars($subs[$i])); |
---|
197 | $last_printed = $i; |
---|
198 | } |
---|
199 | } |
---|
200 | splice(@subs, 0, 18); |
---|
201 | } |
---|
202 | } |
---|
203 | } |
---|
204 | } |
---|
205 | |
---|
206 | printf $outfile "\n"; |
---|
207 | |
---|
208 | # End |
---|