source: trunk/third/perl/lib/File/Find.pm @ 14545

Revision 14545, 18.8 KB checked in by ghudson, 24 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r14544, which included commits to RCS files with non-trunk default branches.
Line 
1package File::Find;
2use 5.005_64;
3require Exporter;
4require Cwd;
5
6=head1 NAME
7
8find - traverse a file tree
9
10finddepth - traverse a directory structure depth-first
11
12=head1 SYNOPSIS
13
14    use File::Find;
15    find(\&wanted, '/foo', '/bar');
16    sub wanted { ... }
17
18    use File::Find;
19    finddepth(\&wanted, '/foo', '/bar');
20    sub wanted { ... }
21
22    use File::Find;
23    find({ wanted => \&process, follow => 1 }, '.');
24
25=head1 DESCRIPTION
26
27The first argument to find() is either a hash reference describing the
28operations to be performed for each file, or a code reference.
29
30Here are the possible keys for the hash:
31
32=over 3
33
34=item C<wanted>
35
36The value should be a code reference.  This code reference is called
37I<the wanted() function> below.
38
39=item C<bydepth>
40
41Reports the name of a directory only AFTER all its entries
42have been reported.  Entry point finddepth() is a shortcut for
43specifying C<{ bydepth => 1 }> in the first argument of find().
44
45=item C<follow>
46
47Causes symbolic links to be followed. Since directory trees with symbolic
48links (followed) may contain files more than once and may even have
49cycles, a hash has to be built up with an entry for each file.
50This might be expensive both in space and time for a large
51directory tree. See I<follow_fast> and I<follow_skip> below.
52If either I<follow> or I<follow_fast> is in effect:
53
54=over 6
55
56=item *
57
58It is guarantueed that an I<lstat> has been called before the user's
59I<wanted()> function is called. This enables fast file checks involving S< _>.
60
61=item *
62
63There is a variable C<$File::Find::fullname> which holds the absolute
64pathname of the file with all symbolic links resolved
65
66=back
67
68=item C<follow_fast>
69
70This is similar to I<follow> except that it may report some files
71more than once. It does detect cycles however.
72Since only symbolic links have to be hashed, this is
73much cheaper both in space and time.
74If processing a file more than once (by the user's I<wanted()> function)
75is worse than just taking time, the option I<follow> should be used.
76
77=item C<follow_skip>
78
79C<follow_skip==1>, which is the default, causes all files which are
80neither directories nor symbolic links to be ignored if they are about
81to be processed a second time. If a directory or a symbolic link
82are about to be processed a second time, File::Find dies.
83C<follow_skip==0> causes File::Find to die if any file is about to be
84processed a second time.
85C<follow_skip==2> causes File::Find to ignore any duplicate files and
86dirctories but to proceed normally otherwise.
87
88
89=item C<no_chdir>
90
91Does not C<chdir()> to each directory as it recurses. The wanted()
92function will need to be aware of this, of course. In this case,
93C<$_> will be the same as C<$File::Find::name>.
94
95=item C<untaint>
96
97If find is used in taint-mode (-T command line switch or if EUID != UID
98or if EGID != GID) then internally directory names have to be untainted
99before they can be cd'ed to. Therefore they are checked against a regular
100expression I<untaint_pattern>. Note, that all names passed to the
101user's I<wanted()> function are still tainted.
102
103=item C<untaint_pattern>
104
105See above. This should be set using the C<qr> quoting operator.
106The default is set to  C<qr|^([-+@\w./]+)$|>.
107Note that the paranthesis which are vital.
108
109=item C<untaint_skip>
110
111If set, directories (subtrees) which fail the I<untaint_pattern>
112are skipped. The default is to 'die' in such a case.
113
114=back
115
116The wanted() function does whatever verifications you want.
117C<$File::Find::dir> contains the current directory name, and C<$_> the
118current filename within that directory.  C<$File::Find::name> contains
119the complete pathname to the file. You are chdir()'d to C<$File::Find::dir> when
120the function is called, unless C<no_chdir> was specified.
121When <follow> or <follow_fast> are in effect there is also a
122C<$File::Find::fullname>.
123The function may set C<$File::Find::prune> to prune the tree
124unless C<bydepth> was specified.
125Unless C<follow> or C<follow_fast> is specified, for compatibility
126reasons (find.pl, find2perl) there are in addition the following globals
127available: C<$File::Find::topdir>, C<$File::Find::topdev>, C<$File::Find::topino>,
128C<$File::Find::topmode> and C<$File::Find::topnlink>.
129
130This library is useful for the C<find2perl> tool, which when fed,
131
132    find2perl / -name .nfs\* -mtime +7 \
133        -exec rm -f {} \; -o -fstype nfs -prune
134
135produces something like:
136
137    sub wanted {
138        /^\.nfs.*\z/s &&
139        (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) &&
140        int(-M _) > 7 &&
141        unlink($_)
142        ||
143        ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) &&
144        $dev < 0 &&
145        ($File::Find::prune = 1);
146    }
147
148Set the variable C<$File::Find::dont_use_nlink> if you're using AFS,
149since AFS cheats.
150
151
152Here's another interesting wanted function.  It will find all symlinks
153that don't resolve:
154
155    sub wanted {
156         -l && !-e && print "bogus link: $File::Find::name\n";
157    }
158
159See also the script C<pfind> on CPAN for a nice application of this
160module.
161
162=head1 CAVEAT
163
164Be aware that the option to follow symblic links can be dangerous.
165Depending on the structure of the directory tree (including symbolic
166links to directories) you might traverse a given (physical) directory
167more than once (only if C<follow_fast> is in effect).
168Furthermore, deleting or changing files in a symbolically linked directory
169might cause very unpleasant surprises, since you delete or change files
170in an unknown directory.
171
172
173=cut
174
175@ISA = qw(Exporter);
176@EXPORT = qw(find finddepth);
177
178
179use strict;
180my $Is_VMS;
181
182require File::Basename;
183
184my %SLnkSeen;
185my ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
186    $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat);
187
188sub contract_name {
189    my ($cdir,$fn) = @_;
190
191    return substr($cdir,0,rindex($cdir,'/')) if $fn eq '.';
192
193    $cdir = substr($cdir,0,rindex($cdir,'/')+1);
194
195    $fn =~ s|^\./||;
196
197    my $abs_name= $cdir . $fn;
198
199    if (substr($fn,0,3) eq '../') {
200        do 1 while ($abs_name=~ s|/(?>[^/]+)/\.\./|/|);
201    }
202
203    return $abs_name;
204}
205
206
207sub PathCombine($$) {
208    my ($Base,$Name) = @_;
209    my $AbsName;
210
211    if (substr($Name,0,1) eq '/') {
212        $AbsName= $Name;
213    }
214    else {
215        $AbsName= contract_name($Base,$Name);
216    }
217
218    # (simple) check for recursion
219    my $newlen= length($AbsName);
220    if ($newlen <= length($Base)) {
221        if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/')
222            && $AbsName eq substr($Base,0,$newlen))
223        {
224            return undef;
225        }
226    }
227    return $AbsName;
228}
229
230sub Follow_SymLink($) {
231    my ($AbsName) = @_;
232
233    my ($NewName,$DEV, $INO);
234    ($DEV, $INO)= lstat $AbsName;
235
236    while (-l _) {
237        if ($SLnkSeen{$DEV, $INO}++) {
238            if ($follow_skip < 2) {
239                die "$AbsName is encountered a second time";
240            }
241            else {
242                return undef;
243            }
244        }
245        $NewName= PathCombine($AbsName, readlink($AbsName));
246        unless(defined $NewName) {
247            if ($follow_skip < 2) {
248                die "$AbsName is a recursive symbolic link";
249            }
250            else {
251                return undef;
252            }
253        }
254        else {
255            $AbsName= $NewName;
256        }
257        ($DEV, $INO) = lstat($AbsName);
258        return undef unless defined $DEV;  #  dangling symbolic link
259    }
260
261    if ($full_check && $SLnkSeen{$DEV, $INO}++) {
262        if ($follow_skip < 1) {
263            die "$AbsName encountered a second time";
264        }
265        else {
266            return undef;
267        }
268    }
269
270    return $AbsName;
271}
272
273our($dir, $name, $fullname, $prune);
274sub _find_dir_symlnk($$$);
275sub _find_dir($$$);
276
277sub _find_opt {
278    my $wanted = shift;
279    die "invalid top directory" unless defined $_[0];
280
281    my $cwd           = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::cwd();
282    my $cwd_untainted = $cwd;
283    $wanted_callback  = $wanted->{wanted};
284    $bydepth          = $wanted->{bydepth};
285    $no_chdir         = $wanted->{no_chdir};
286    $full_check       = $wanted->{follow};
287    $follow           = $full_check || $wanted->{follow_fast};
288    $follow_skip      = $wanted->{follow_skip};
289    $untaint          = $wanted->{untaint};
290    $untaint_pat      = $wanted->{untaint_pattern};
291    $untaint_skip     = $wanted->{untaint_skip};
292
293    # for compatability reasons (find.pl, find2perl)
294    our ($topdir, $topdev, $topino, $topmode, $topnlink);
295
296    # a symbolic link to a directory doesn't increase the link count
297    $avoid_nlink      = $follow || $File::Find::dont_use_nlink;
298   
299    if ( $untaint ) {
300        $cwd_untainted= $1 if $cwd_untainted =~ m|$untaint_pat|;
301        die "insecure cwd in find(depth)"  unless defined($cwd_untainted);
302    }
303   
304    my ($abs_dir, $Is_Dir);
305
306    Proc_Top_Item:
307    foreach my $TOP (@_) {
308        my $top_item = $TOP;
309        $top_item =~ s|/\z|| unless $top_item eq '/';
310        $Is_Dir= 0;
311       
312        ($topdev,$topino,$topmode,$topnlink) = stat $top_item;
313
314        if ($follow) {
315            if (substr($top_item,0,1) eq '/') {
316                $abs_dir = $top_item;
317            }
318            elsif ($top_item eq '.') {
319                $abs_dir = $cwd;
320            }
321            else {  # care about any  ../
322                $abs_dir = contract_name("$cwd/",$top_item);
323            }
324            $abs_dir= Follow_SymLink($abs_dir);
325            unless (defined $abs_dir) {
326                warn "$top_item is a dangling symbolic link\n";
327                next Proc_Top_Item;
328            }
329            if (-d _) {
330                _find_dir_symlnk($wanted, $abs_dir, $top_item);
331                $Is_Dir= 1;
332            }
333        }
334        else { # no follow
335            $topdir = $top_item;
336            unless (defined $topnlink) {
337                warn "Can't stat $top_item: $!\n";
338                next Proc_Top_Item;
339            }
340            if (-d _) {
341                $top_item =~ s/\.dir\z// if $Is_VMS;
342                _find_dir($wanted, $top_item, $topnlink);
343                $Is_Dir= 1;
344            }
345            else {
346                $abs_dir= $top_item;
347            }
348        }
349
350        unless ($Is_Dir) {
351            unless (($_,$dir) = File::Basename::fileparse($abs_dir)) {
352                ($dir,$_) = ('./', $top_item);
353            }
354
355            $abs_dir = $dir;
356            if ($untaint) {
357                my $abs_dir_save = $abs_dir;
358                $abs_dir = $1 if $abs_dir =~ m|$untaint_pat|;
359                unless (defined $abs_dir) {
360                    if ($untaint_skip == 0) {
361                        die "directory $abs_dir_save is still tainted";
362                    }
363                    else {
364                        next Proc_Top_Item;
365                    }
366                }
367            }
368
369            unless ($no_chdir or chdir $abs_dir) {
370                warn "Couldn't chdir $abs_dir: $!\n";
371                next Proc_Top_Item;
372            }
373
374            $name = $abs_dir . $_;
375
376            &$wanted_callback;
377
378        }
379
380        $no_chdir or chdir $cwd_untainted;
381    }
382}
383
384# API:
385#  $wanted
386#  $p_dir :  "parent directory"
387#  $nlink :  what came back from the stat
388# preconditions:
389#  chdir (if not no_chdir) to dir
390
391sub _find_dir($$$) {
392    my ($wanted, $p_dir, $nlink) = @_;
393    my ($CdLvl,$Level) = (0,0);
394    my @Stack;
395    my @filenames;
396    my ($subcount,$sub_nlink);
397    my $SE= [];
398    my $dir_name= $p_dir;
399    my $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" );
400    my $dir_rel= '.';      # directory name relative to current directory
401
402    local ($dir, $name, $prune, *DIR);
403     
404    unless ($no_chdir or $p_dir eq '.') {
405        my $udir = $p_dir;
406        if ($untaint) {
407            $udir = $1 if $p_dir =~ m|$untaint_pat|;
408            unless (defined $udir) {
409                if ($untaint_skip == 0) {
410                    die "directory $p_dir is still tainted";
411                }
412                else {
413                    return;
414                }
415            }
416        }
417        unless (chdir $udir) {
418            warn "Can't cd to $udir: $!\n";
419            return;
420        }
421    }
422   
423    push @Stack,[$CdLvl,$p_dir,$dir_rel,-1]  if  $bydepth;
424
425    while (defined $SE) {
426        unless ($bydepth) {
427            $dir= $p_dir;
428            $name= $dir_name;
429            $_= ($no_chdir ? $dir_name : $dir_rel );
430            # prune may happen here
431            $prune= 0;
432            &$wanted_callback;
433            next if $prune;
434        }
435     
436        # change to that directory
437        unless ($no_chdir or $dir_rel eq '.') {
438            my $udir= $dir_rel;
439            if ($untaint) {
440                $udir = $1 if $dir_rel =~ m|$untaint_pat|;
441                unless (defined $udir) {
442                    if ($untaint_skip == 0) {
443                        die "directory ("
444                            . ($p_dir ne '/' ? $p_dir : '')
445                            . "/) $dir_rel is still tainted";
446                    }
447                }
448            }
449            unless (chdir $udir) {
450                warn "Can't cd to ("
451                    . ($p_dir ne '/' ? $p_dir : '')
452                    . "/) $udir : $!\n";
453                next;
454            }
455            $CdLvl++;
456        }
457
458        $dir= $dir_name;
459
460        # Get the list of files in the current directory.
461        unless (opendir DIR, ($no_chdir ? $dir_name : '.')) {
462            warn "Can't opendir($dir_name): $!\n";
463            next;
464        }
465        @filenames = readdir DIR;
466        closedir(DIR);
467
468        if ($nlink == 2 && !$avoid_nlink) {
469            # This dir has no subdirectories.
470            for my $FN (@filenames) {
471                next if $FN =~ /^\.{1,2}\z/;
472               
473                $name = $dir_pref . $FN;
474                $_ = ($no_chdir ? $name : $FN);
475                &$wanted_callback;
476            }
477
478        }
479        else {
480            # This dir has subdirectories.
481            $subcount = $nlink - 2;
482
483            for my $FN (@filenames) {
484                next if $FN =~ /^\.{1,2}\z/;
485                if ($subcount > 0 || $avoid_nlink) {
486                    # Seen all the subdirs?
487                    # check for directoriness.
488                    # stat is faster for a file in the current directory
489                    $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3];
490
491                    if (-d _) {
492                        --$subcount;
493                        $FN =~ s/\.dir\z// if $Is_VMS;
494                        push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink];
495                    }
496                    else {
497                        $name = $dir_pref . $FN;
498                        $_= ($no_chdir ? $name : $FN);
499                        &$wanted_callback;
500                    }
501                }
502                else {
503                    $name = $dir_pref . $FN;
504                    $_= ($no_chdir ? $name : $FN);
505                    &$wanted_callback;
506                }
507            }
508        }
509    }
510    continue {
511        while ( defined ($SE = pop @Stack) ) {
512            ($Level, $p_dir, $dir_rel, $nlink) = @$SE;
513            if ($CdLvl > $Level && !$no_chdir) {
514                my $tmp = join('/',('..') x ($CdLvl-$Level));
515                die "Can't cd to $dir_name" . $tmp
516                    unless chdir ($tmp);
517                $CdLvl = $Level;
518            }
519            $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
520            $dir_pref = "$dir_name/";
521            if ( $nlink < 0 ) {  # must be finddepth, report dirname now
522                $name = $dir_name;
523                if ( substr($name,-2) eq '/.' ) {
524                  $name =~ s|/\.$||;
525                }
526                $dir = $p_dir;
527                $_ = ($no_chdir ? $dir_name : $dir_rel );
528                if ( substr($_,-2) eq '/.' ) {
529                  s|/\.$||;
530                }
531                &$wanted_callback;
532            } else {
533                push @Stack,[$CdLvl,$p_dir,$dir_rel,-1]  if  $bydepth;
534                last;
535            }
536        }
537    }
538}
539
540
541# API:
542#  $wanted
543#  $dir_loc : absolute location of a dir
544#  $p_dir   : "parent directory"
545# preconditions:
546#  chdir (if not no_chdir) to dir
547
548sub _find_dir_symlnk($$$) {
549    my ($wanted, $dir_loc, $p_dir) = @_;
550    my @Stack;
551    my @filenames;
552    my $new_loc;
553    my $pdir_loc = $dir_loc;
554    my $SE = [];
555    my $dir_name = $p_dir;
556    my $dir_pref = ( $p_dir   eq '/' ? '/' : "$p_dir/" );
557    my $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" );
558    my $dir_rel = '.';          # directory name relative to current directory
559    my $byd_flag;               # flag for pending stack entry if $bydepth
560
561    local ($dir, $name, $fullname, $prune, *DIR);
562   
563    unless ($no_chdir or $p_dir eq '.') {
564        my $udir = $dir_loc;
565        if ($untaint) {
566            $udir = $1 if $dir_loc =~ m|$untaint_pat|;
567            unless (defined $udir) {
568                if ($untaint_skip == 0) {
569                    die "directory $dir_loc is still tainted";
570                }
571                else {
572                    return;
573                }
574            }
575        }
576        unless (chdir $udir) {
577            warn "Can't cd to $udir: $!\n";
578            return;
579        }
580    }
581
582    push @Stack,[$dir_loc,$pdir_loc,$p_dir,$dir_rel,-1]  if  $bydepth;
583
584    while (defined $SE) {
585
586        unless ($bydepth) {
587            $dir= $p_dir;
588            $name= $dir_name;
589            $_= ($no_chdir ? $dir_name : $dir_rel );
590            $fullname= $dir_loc;
591            # prune may happen here
592            $prune= 0;
593            &$wanted_callback;
594            next if  $prune;
595        }
596
597        # change to that directory
598        unless ($no_chdir or $dir_rel eq '.') {
599            my $udir = $dir_loc;
600            if ($untaint) {
601                $udir = $1 if $dir_loc =~ m|$untaint_pat|;
602                unless (defined $udir ) {
603                    if ($untaint_skip == 0) {
604                        die "directory $dir_loc is still tainted";
605                    }
606                    else {
607                        next;
608                    }
609                }
610            }
611            unless (chdir $udir) {
612                warn "Can't cd to $udir: $!\n";
613                next;
614            }
615        }
616
617        $dir = $dir_name;
618
619        # Get the list of files in the current directory.
620        unless (opendir DIR, ($no_chdir ? $dir_loc : '.')) {
621            warn "Can't opendir($dir_loc): $!\n";
622            next;
623        }
624        @filenames = readdir DIR;
625        closedir(DIR);
626
627        for my $FN (@filenames) {
628            next if $FN =~ /^\.{1,2}\z/;
629
630            # follow symbolic links / do an lstat
631            $new_loc = Follow_SymLink($loc_pref.$FN);
632
633            # ignore if invalid symlink
634            next unless defined $new_loc;
635     
636            if (-d _) {
637                push @Stack,[$new_loc,$dir_loc,$dir_name,$FN,1];
638            }
639            else {
640                $fullname = $new_loc;
641                $name = $dir_pref . $FN;
642                $_ = ($no_chdir ? $name : $FN);
643                &$wanted_callback;
644            }
645        }
646
647    }
648    continue {
649        while (defined($SE = pop @Stack)) {
650            ($dir_loc, $pdir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE;
651            $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
652            $dir_pref = "$dir_name/";
653            $loc_pref = "$dir_loc/";
654            if ( $byd_flag < 0 ) {  # must be finddepth, report dirname now
655                unless ($no_chdir or $dir_rel eq '.') {
656                    my $udir = $pdir_loc;
657                    if ($untaint) {
658                        $udir = $1 if $dir_loc =~ m|$untaint_pat|;
659                    }
660                    unless (chdir $udir) {
661                        warn "Can't cd to $udir: $!\n";
662                        next;
663                    }
664                }
665                $fullname = $dir_loc;
666                $name = $dir_name;
667                if ( substr($name,-2) eq '/.' ) {
668                  $name =~ s|/\.$||;
669                }
670                $dir = $p_dir;
671                $_ = ($no_chdir ? $dir_name : $dir_rel);
672                if ( substr($_,-2) eq '/.' ) {
673                  s|/\.$||;
674                }
675
676                &$wanted_callback;
677            } else {
678                push @Stack,[$dir_loc, $pdir_loc, $p_dir, $dir_rel,-1]  if  $bydepth;
679                last;
680            }
681        }
682    }
683}
684
685
686sub wrap_wanted {
687    my $wanted = shift;
688    if ( ref($wanted) eq 'HASH' ) {
689        if ( $wanted->{follow} || $wanted->{follow_fast}) {
690            $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip};
691        }
692        if ( $wanted->{untaint} ) {
693            $wanted->{untaint_pattern} = qr|^([-+@\w./]+)$| 
694                unless defined $wanted->{untaint_pattern};
695            $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip};
696        }
697        return $wanted;
698    }
699    else {
700        return { wanted => $wanted };
701    }
702}
703
704sub find {
705    my $wanted = shift;
706    _find_opt(wrap_wanted($wanted), @_);
707    %SLnkSeen= ();  # free memory
708}
709
710sub finddepth {
711    my $wanted = wrap_wanted(shift);
712    $wanted->{bydepth} = 1;
713    _find_opt($wanted, @_);
714    %SLnkSeen= ();  # free memory
715}
716
717# These are hard-coded for now, but may move to hint files.
718if ($^O eq 'VMS') {
719    $Is_VMS = 1;
720    $File::Find::dont_use_nlink = 1;
721}
722
723$File::Find::dont_use_nlink = 1
724    if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $^O eq 'MSWin32';
725
726# Set dont_use_nlink in your hint file if your system's stat doesn't
727# report the number of links in a directory as an indication
728# of the number of files.
729# See, e.g. hints/machten.sh for MachTen 2.2.
730unless ($File::Find::dont_use_nlink) {
731    require Config;
732    $File::Find::dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'});
733}
734
7351;
Note: See TracBrowser for help on using the repository browser.