1 | #! /usr/bin/perl |
---|
2 | |
---|
3 | # Program for testing regular expressions with perl to check that PCRE handles |
---|
4 | # them the same. |
---|
5 | |
---|
6 | |
---|
7 | # Function for turning a string into a string of printing chars |
---|
8 | |
---|
9 | sub pchars { |
---|
10 | my($t) = ""; |
---|
11 | |
---|
12 | foreach $c (split(//, $_[0])) |
---|
13 | { |
---|
14 | if (ord $c >= 32 && ord $c < 127) { $t .= $c; } |
---|
15 | else { $t .= sprintf("\\x%02x", ord $c); } |
---|
16 | } |
---|
17 | $t; |
---|
18 | } |
---|
19 | |
---|
20 | |
---|
21 | |
---|
22 | # Read lines from named file or stdin and write to named file or stdout; lines |
---|
23 | # consist of a regular expression, in delimiters and optionally followed by |
---|
24 | # options, followed by a set of test data, terminated by an empty line. |
---|
25 | |
---|
26 | # Sort out the input and output files |
---|
27 | |
---|
28 | if (@ARGV > 0) |
---|
29 | { |
---|
30 | open(INFILE, "<$ARGV[0]") || die "Failed to open $ARGV[0]\n"; |
---|
31 | $infile = "INFILE"; |
---|
32 | } |
---|
33 | else { $infile = "STDIN"; } |
---|
34 | |
---|
35 | if (@ARGV > 1) |
---|
36 | { |
---|
37 | open(OUTFILE, ">$ARGV[1]") || die "Failed to open $ARGV[1]\n"; |
---|
38 | $outfile = "OUTFILE"; |
---|
39 | } |
---|
40 | else { $outfile = "STDOUT"; } |
---|
41 | |
---|
42 | printf($outfile "Perl $] Regular Expressions\n\n"); |
---|
43 | |
---|
44 | # Main loop |
---|
45 | |
---|
46 | NEXT_RE: |
---|
47 | for (;;) |
---|
48 | { |
---|
49 | printf " re> " if $infile eq "STDIN"; |
---|
50 | last if ! ($_ = <$infile>); |
---|
51 | printf $outfile "$_" if $infile ne "STDIN"; |
---|
52 | next if ($_ eq ""); |
---|
53 | |
---|
54 | $pattern = $_; |
---|
55 | |
---|
56 | while ($pattern !~ /^\s*(.).*\1/s) |
---|
57 | { |
---|
58 | printf " > " if $infile eq "STDIN"; |
---|
59 | last if ! ($_ = <$infile>); |
---|
60 | printf $outfile "$_" if $infile ne "STDIN"; |
---|
61 | $pattern .= $_; |
---|
62 | } |
---|
63 | |
---|
64 | chomp($pattern); |
---|
65 | $pattern =~ s/\s+$//; |
---|
66 | |
---|
67 | # The private /+ modifier means "print $' afterwards". We use it |
---|
68 | # only on the end of patterns to make it easy to chop off here. |
---|
69 | |
---|
70 | $showrest = ($pattern =~ s/\+(?=[a-z]*$)//); |
---|
71 | |
---|
72 | # Check that the pattern is valid |
---|
73 | |
---|
74 | eval "\$_ =~ ${pattern}"; |
---|
75 | if ($@) |
---|
76 | { |
---|
77 | printf $outfile "Error: $@"; |
---|
78 | next NEXT_RE; |
---|
79 | } |
---|
80 | |
---|
81 | # If the /g modifier is present, we want to put a loop round the matching; |
---|
82 | # otherwise just a single "if". |
---|
83 | |
---|
84 | $cmd = ($pattern =~ /g[a-z]*$/)? "while" : "if"; |
---|
85 | |
---|
86 | # If the pattern is actually the null string, Perl uses the most recently |
---|
87 | # executed (and successfully compiled) regex is used instead. This is a |
---|
88 | # nasty trap for the unwary! The PCRE test suite does contain null strings |
---|
89 | # in places - if they are allowed through here all sorts of weird and |
---|
90 | # unexpected effects happen. To avoid this, we replace such patterns with |
---|
91 | # a non-null pattern that has the same effect. |
---|
92 | |
---|
93 | $pattern = "/(?#)/$2" if ($pattern =~ /^(.)\1(.*)$/); |
---|
94 | |
---|
95 | # Read data lines and test them |
---|
96 | |
---|
97 | for (;;) |
---|
98 | { |
---|
99 | printf "data> " if $infile eq "STDIN"; |
---|
100 | last NEXT_RE if ! ($_ = <$infile>); |
---|
101 | chomp; |
---|
102 | printf $outfile "$_\n" if $infile ne "STDIN"; |
---|
103 | |
---|
104 | s/\s+$//; |
---|
105 | s/^\s+//; |
---|
106 | |
---|
107 | last if ($_ eq ""); |
---|
108 | |
---|
109 | $x = eval "\"$_\""; # To get escapes processed |
---|
110 | |
---|
111 | # Empty array for holding results, then do the matching. |
---|
112 | |
---|
113 | @subs = (); |
---|
114 | |
---|
115 | eval "${cmd} (\$x =~ ${pattern}) {" . |
---|
116 | "push \@subs,\$&;" . |
---|
117 | "push \@subs,\$1;" . |
---|
118 | "push \@subs,\$2;" . |
---|
119 | "push \@subs,\$3;" . |
---|
120 | "push \@subs,\$4;" . |
---|
121 | "push \@subs,\$5;" . |
---|
122 | "push \@subs,\$6;" . |
---|
123 | "push \@subs,\$7;" . |
---|
124 | "push \@subs,\$8;" . |
---|
125 | "push \@subs,\$9;" . |
---|
126 | "push \@subs,\$10;" . |
---|
127 | "push \@subs,\$11;" . |
---|
128 | "push \@subs,\$12;" . |
---|
129 | "push \@subs,\$13;" . |
---|
130 | "push \@subs,\$14;" . |
---|
131 | "push \@subs,\$15;" . |
---|
132 | "push \@subs,\$16;" . |
---|
133 | "push \@subs,\$'; }"; |
---|
134 | |
---|
135 | if ($@) |
---|
136 | { |
---|
137 | printf $outfile "Error: $@\n"; |
---|
138 | next NEXT_RE; |
---|
139 | } |
---|
140 | elsif (scalar(@subs) == 0) |
---|
141 | { |
---|
142 | printf $outfile "No match\n"; |
---|
143 | } |
---|
144 | else |
---|
145 | { |
---|
146 | while (scalar(@subs) != 0) |
---|
147 | { |
---|
148 | printf $outfile (" 0: %s\n", &pchars($subs[0])); |
---|
149 | printf $outfile (" 0+ %s\n", &pchars($subs[17])) if $showrest; |
---|
150 | $last_printed = 0; |
---|
151 | for ($i = 1; $i <= 16; $i++) |
---|
152 | { |
---|
153 | if (defined $subs[$i]) |
---|
154 | { |
---|
155 | while ($last_printed++ < $i-1) |
---|
156 | { printf $outfile ("%2d: <unset>\n", $last_printed); } |
---|
157 | printf $outfile ("%2d: %s\n", $i, &pchars($subs[$i])); |
---|
158 | $last_printed = $i; |
---|
159 | } |
---|
160 | } |
---|
161 | splice(@subs, 0, 18); |
---|
162 | } |
---|
163 | } |
---|
164 | } |
---|
165 | } |
---|
166 | |
---|
167 | printf $outfile "\n"; |
---|
168 | |
---|
169 | # End |
---|