source: trunk/third/pcre/perltest8 @ 19309

Revision 19309, 4.7 KB checked in by ghudson, 22 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r19308, which included commits to RCS files with non-trunk default branches.
  • Property svn:executable set to *
Line 
1#! /usr/bin/perl
2
3# Program for testing regular expressions with perl to check that PCRE handles
4# them the same. This is the version that supports /8 for UTF-8 testing. It
5# requires at least Perl 5.6.
6
7
8# Function for turning a string into a string of printing chars. There are
9# currently problems with UTF-8 strings; this fudges round them.
10
11sub pchars {
12my($t) = "";
13
14if ($utf8)
15  {
16  use utf8;
17  @p = unpack('U*', $_[0]);
18  foreach $c (@p)
19    {
20    if ($c >= 32 && $c < 127) { $t .= chr $c; }
21      else { $t .= sprintf("\\x{%02x}", $c); }
22    }
23  }
24
25else
26  {
27  foreach $c (split(//, $_[0]))
28    {
29    if (ord $c >= 32 && ord $c < 127) { $t .= $c; }
30      else { $t .= sprintf("\\x%02x", ord $c); }
31    }
32  }
33
34$t;
35}
36
37
38
39# Read lines from named file or stdin and write to named file or stdout; lines
40# consist of a regular expression, in delimiters and optionally followed by
41# options, followed by a set of test data, terminated by an empty line.
42
43# Sort out the input and output files
44
45if (@ARGV > 0)
46  {
47  open(INFILE, "<$ARGV[0]") || die "Failed to open $ARGV[0]\n";
48  $infile = "INFILE";
49  }
50else { $infile = "STDIN"; }
51
52if (@ARGV > 1)
53  {
54  open(OUTFILE, ">$ARGV[1]") || die "Failed to open $ARGV[1]\n";
55  $outfile = "OUTFILE";
56  }
57else { $outfile = "STDOUT"; }
58
59printf($outfile "Perl $] Regular Expressions\n\n");
60
61# Main loop
62
63NEXT_RE:
64for (;;)
65  {
66  printf "  re> " if $infile eq "STDIN";
67  last if ! ($_ = <$infile>);
68  printf $outfile "$_" if $infile ne "STDIN";
69  next if ($_ eq "");
70
71  $pattern = $_;
72
73  while ($pattern !~ /^\s*(.).*\1/s)
74    {
75    printf "    > " if $infile eq "STDIN";
76    last if ! ($_ = <$infile>);
77    printf $outfile "$_" if $infile ne "STDIN";
78    $pattern .= $_;
79    }
80
81   chomp($pattern);
82   $pattern =~ s/\s+$//;
83
84  # The private /+ modifier means "print $' afterwards".
85
86  $showrest = ($pattern =~ s/\+(?=[a-z]*$)//);
87
88  # The private /8 modifier means "operate in UTF-8". Currently, Perl
89  # has bugs that we try to work around using this flag.
90
91  $utf8 = ($pattern =~ s/8(?=[a-z]*$)//);
92
93  # Check that the pattern is valid
94
95  if ($utf8)
96    {
97    use utf8;
98    eval "\$_ =~ ${pattern}";
99    }
100  else
101    {
102    eval "\$_ =~ ${pattern}";
103    }
104
105  if ($@)
106    {
107    printf $outfile "Error: $@";
108    next NEXT_RE;
109    }
110
111  # If the /g modifier is present, we want to put a loop round the matching;
112  # otherwise just a single "if".
113
114  $cmd = ($pattern =~ /g[a-z]*$/)? "while" : "if";
115
116  # If the pattern is actually the null string, Perl uses the most recently
117  # executed (and successfully compiled) regex is used instead. This is a
118  # nasty trap for the unwary! The PCRE test suite does contain null strings
119  # in places - if they are allowed through here all sorts of weird and
120  # unexpected effects happen. To avoid this, we replace such patterns with
121  # a non-null pattern that has the same effect.
122
123  $pattern = "/(?#)/$2" if ($pattern =~ /^(.)\1(.*)$/);
124
125  # Read data lines and test them
126
127  for (;;)
128    {
129    printf "data> " if $infile eq "STDIN";
130    last NEXT_RE if ! ($_ = <$infile>);
131    chomp;
132    printf $outfile "$_\n" if $infile ne "STDIN";
133
134    s/\s+$//;
135    s/^\s+//;
136
137    last if ($_ eq "");
138
139    $x = eval "\"$_\"";   # To get escapes processed
140
141    # Empty array for holding results, then do the matching.
142
143    @subs = ();
144
145    $pushes = "push \@subs,\$&;" .
146         "push \@subs,\$1;" .
147         "push \@subs,\$2;" .
148         "push \@subs,\$3;" .
149         "push \@subs,\$4;" .
150         "push \@subs,\$5;" .
151         "push \@subs,\$6;" .
152         "push \@subs,\$7;" .
153         "push \@subs,\$8;" .
154         "push \@subs,\$9;" .
155         "push \@subs,\$10;" .
156         "push \@subs,\$11;" .
157         "push \@subs,\$12;" .
158         "push \@subs,\$13;" .
159         "push \@subs,\$14;" .
160         "push \@subs,\$15;" .
161         "push \@subs,\$16;" .
162         "push \@subs,\$'; }";
163
164    if ($utf8)
165      {
166      use utf8;
167      eval "${cmd} (\$x =~ ${pattern}) {" . $pushes;
168      }
169    else
170      {
171      eval "${cmd} (\$x =~ ${pattern}) {" . $pushes;
172      }
173
174    if ($@)
175      {
176      printf $outfile "Error: $@\n";
177      next NEXT_RE;
178      }
179    elsif (scalar(@subs) == 0)
180      {
181      printf $outfile "No match\n";
182      }
183    else
184      {
185      while (scalar(@subs) != 0)
186        {
187        printf $outfile (" 0: %s\n", &pchars($subs[0]));
188        printf $outfile (" 0+ %s\n", &pchars($subs[17])) if $showrest;
189        $last_printed = 0;
190        for ($i = 1; $i <= 16; $i++)
191          {
192          if (defined $subs[$i])
193            {
194            while ($last_printed++ < $i-1)
195              { printf $outfile ("%2d: <unset>\n", $last_printed); }
196            printf $outfile ("%2d: %s\n", $i, &pchars($subs[$i]));
197            $last_printed = $i;
198            }
199          }
200        splice(@subs, 0, 18);
201        }
202      }
203    }
204  }
205
206printf $outfile "\n";
207
208# End
Note: See TracBrowser for help on using the repository browser.