source: trunk/third/perl/installhtml @ 20075

Revision 20075, 18.1 KB checked in by zacheiss, 21 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r20074, which included commits to RCS files with non-trunk default branches.
  • Property svn:executable set to *
Line 
1#!./perl -Ilib -w
2
3# This file should really be extracted from a .PL file
4
5use strict;
6use Config;             # for config options in the makefile
7use Getopt::Long;       # for command-line parsing
8use Cwd;
9use Pod::Html 'anchorify';
10
11=head1 NAME
12
13installhtml - converts a collection of POD pages to HTML format.
14
15=head1 SYNOPSIS
16
17    installhtml  [--help] [--podpath=<name>:...:<name>] [--podroot=<name>]
18         [--htmldir=<name>] [--htmlroot=<name>]  [--norecurse] [--recurse]
19         [--splithead=<name>,...,<name>]   [--splititem=<name>,...,<name>]
20         [--libpods=<name>,...,<name>]  [--verbose]
21
22=head1 DESCRIPTION
23
24I<installhtml> converts a collection of POD pages to a corresponding
25collection of HTML pages.  This is primarily used to convert the pod
26pages found in the perl distribution.
27
28=head1 OPTIONS
29
30=over 4
31
32=item B<--help> help
33
34Displays the usage.
35
36=item B<--podroot> POD search path base directory
37
38The base directory to search for all .pod and .pm files to be converted.
39Default is current directory.
40
41=item B<--podpath> POD search path
42
43The list of directories to search for .pod and .pm files to be converted.
44Default is `podroot/.'.
45
46=item B<--recurse> recurse on subdirectories
47
48Whether or not to convert all .pm and .pod files found in subdirectories
49too.  Default is to not recurse.
50
51=item B<--htmldir> HTML destination directory
52
53The base directory which all HTML files will be written to.  This should
54be a path relative to the filesystem, not the resulting URL.
55
56=item B<--htmlroot> URL base directory
57
58The base directory which all resulting HTML files will be visible at in
59a URL.  The default is `/'.
60
61=item B<--splithead> POD files to split on =head directive
62
63Comma-separated list of pod files to split by the =head directive.  The
64.pod suffix is optional. These files should have names specified
65relative to podroot.
66
67=item B<--splititem> POD files to split on =item directive
68
69Comma-separated list of all pod files to split by the =item directive.
70The .pod suffix is optional.  I<installhtml> does not do the actual
71split, rather it invokes I<splitpod> to do the dirty work.  As with
72--splithead, these files should have names specified relative to podroot.
73
74=item B<--splitpod> Directory containing the splitpod program
75
76The directory containing the splitpod program. The default is `podroot/pod'.
77
78=item B<--libpods> library PODs for LE<lt>E<gt> links
79
80Comma-separated list of "library" pod files.  This is the same list that
81will be passed to pod2html when any pod is converted.
82
83=item B<--verbose> verbose output
84
85Self-explanatory.
86
87=back
88
89=head1 EXAMPLE
90
91The following command-line is an example of the one we use to convert
92perl documentation:
93
94    ./installhtml --podpath=lib:ext:pod:vms   \
95                        --podroot=/usr/src/perl     \
96                        --htmldir=/perl/nmanual     \
97                        --htmlroot=/perl/nmanual    \
98                        --splithead=pod/perlipc     \
99                        --splititem=pod/perlfunc    \
100                        --libpods=perlfunc,perlguts,perlvar,perlrun,perlop \
101                        --recurse \
102                        --verbose
103
104=head1 AUTHOR
105
106Chris Hall E<lt>hallc@cs.colorado.eduE<gt>
107
108=head1 TODO
109
110=cut
111
112my $usage;
113
114$usage =<<END_OF_USAGE;
115Usage: $0 --help --podpath=<name>:...:<name> --podroot=<name>
116         --htmldir=<name> --htmlroot=<name> --norecurse --recurse
117         --splithead=<name>,...,<name> --splititem=<name>,...,<name>
118         --libpods=<name>,...,<name> --verbose
119
120    --help      - this message
121    --podpath   - colon-separated list of directories containing .pod and
122                  .pm files to be converted (. by default).
123    --podroot   - filesystem base directory from which all relative paths in
124                  podpath stem (default is .).
125    --htmldir   - directory to store resulting html files in relative
126                  to the filesystem (\$podroot/html by default).
127    --htmlroot  - http-server base directory from which all relative paths
128                  in podpath stem (default is /).
129    --libpods   - comma-separated list of files to search for =item pod
130                  directives in as targets of C<> and implicit links (empty
131                  by default).
132    --norecurse - don't recurse on those subdirectories listed in podpath.
133                  (default behavior).
134    --recurse   - recurse on those subdirectories listed in podpath
135    --splithead - comma-separated list of .pod or .pm files to split.  will
136                  split each file into several smaller files at every occurrence
137                  of a pod =head[1-6] directive.
138    --splititem - comma-separated list of .pod or .pm files to split using
139                  splitpod.
140    --splitpod  - directory where the program splitpod can be found
141                  (\$podroot/pod by default).
142    --verbose   - self-explanatory.
143
144END_OF_USAGE
145
146my (@libpods, @podpath, $podroot, $htmldir, $htmlroot, $recurse, @splithead,
147    @splititem, $splitpod, $verbose, $pod2html);
148
149@libpods = ();
150@podpath = ( "." );     # colon-separated list of directories containing .pod
151                        # and .pm files to be converted.
152$podroot = ".";         # assume the pods we want are here
153$htmldir = "";          # nothing for now...
154$htmlroot = "/";        # default value
155$recurse = 0;           # default behavior
156@splithead = ();        # don't split any files by default
157@splititem = ();        # don't split any files by default
158$splitpod = "";         # nothing for now.
159
160$verbose = 0;           # whether or not to print debugging info
161
162$pod2html = "pod/pod2html";
163
164usage("") unless @ARGV;
165
166# Overcome shell's p1,..,p8 limitation. 
167# See vms/descrip_mms.template -> descrip.mms for invokation.
168if ( $^O eq 'VMS' ) { @ARGV = split(/\s+/,$ARGV[0]); }
169
170use vars qw( %Options );
171
172# parse the command-line
173my $result = GetOptions( \%Options, qw(
174        help
175        podpath=s
176        podroot=s
177        htmldir=s
178        htmlroot=s
179        libpods=s
180        recurse!
181        splithead=s
182        splititem=s
183        splitpod=s
184        verbose
185));
186usage("invalid parameters") unless $result;
187parse_command_line();
188
189
190# set these variables to appropriate values if the user didn't specify
191#  values for them.
192$htmldir = "$htmlroot/html" unless $htmldir;
193$splitpod = "$podroot/pod" unless $splitpod;
194
195
196# make sure that the destination directory exists
197(mkdir($htmldir, 0755) ||
198        die "$0: cannot make directory $htmldir: $!\n") if ! -d $htmldir;
199
200
201# the following array will eventually contain files that are to be
202# ignored in the conversion process.  these are files that have been
203# process by splititem or splithead and should not be converted as a
204# result.
205my @ignore = ();
206my @splitdirs;
207
208# split pods.  its important to do this before convert ANY pods because
209#  it may effect some of the links
210@splitdirs = ();    # files in these directories won't get an index
211split_on_head($podroot, $htmldir, \@splitdirs, \@ignore, @splithead);
212split_on_item($podroot,           \@splitdirs, \@ignore, @splititem);
213
214
215# convert the pod pages found in @poddirs
216#warn "converting files\n" if $verbose;
217#warn "\@ignore\t= @ignore\n" if $verbose;
218foreach my $dir (@podpath) {
219    installdir($dir, $recurse, $podroot, \@splitdirs, \@ignore);
220}
221
222
223# now go through and create master indices for each pod we split
224foreach my $dir (@splititem) {
225    print "creating index $htmldir/$dir.html\n" if $verbose;
226    create_index("$htmldir/$dir.html", "$htmldir/$dir");
227}
228
229foreach my $dir (@splithead) {
230    (my $pod = $dir) =~ s,^.*/,,;
231    $dir .= ".pod" unless $dir =~ /(\.pod|\.pm)$/;
232    # let pod2html create the file
233    runpod2html($dir, 1);
234
235    # now go through and truncate after the index
236    $dir =~ /^(.*?)(\.pod|\.pm)?$/sm;
237    my $file = "$htmldir/$1";
238    print "creating index $file.html\n" if $verbose;
239
240    # read in everything until what would have been the first =head
241    # directive, patching the index as we go.
242    open(H, "<$file.html") ||
243        die "$0: error opening $file.html for input: $!\n";
244    $/ = "";
245    my @data = ();
246    while (<H>) {
247        last if /name="name"/i;
248        $_ =~ s{href="#(.*)">}{
249            my $url = "$pod/$1.html" ;
250            $url = Pod::Html::relativize_url( $url, "$file.html" )
251            if ( ! defined $Options{htmlroot} || $Options{htmlroot} eq '' );
252            "href=\"$url\">" ;
253        }egi;
254        push @data, $_;
255    }
256    close(H);
257
258    # now rewrite the file
259    open(H, ">$file.html") ||
260        die "$0: error opening $file.html for output: $!\n";
261    print H "@data", "\n";
262    close(H);
263}
264
265##############################################################################
266
267
268sub usage {
269    warn "$0: @_\n" if @_;
270    die $usage;
271}
272
273
274sub parse_command_line {
275    usage() if defined $Options{help};
276    $Options{help} = "";                    # make -w shut up
277
278    # list of directories
279    @podpath   = split(":", $Options{podpath}) if defined $Options{podpath};
280
281    # lists of files
282    @splithead = split(",", $Options{splithead}) if defined $Options{splithead};
283    @splititem = split(",", $Options{splititem}) if defined $Options{splititem};
284    @libpods   = split(",", $Options{libpods}) if defined $Options{libpods};
285
286    $htmldir  = $Options{htmldir}           if defined $Options{htmldir};
287    $htmlroot = $Options{htmlroot}          if defined $Options{htmlroot};
288    $podroot  = $Options{podroot}           if defined $Options{podroot};
289    $splitpod = $Options{splitpod}          if defined $Options{splitpod};
290
291    $recurse  = $Options{recurse}           if defined $Options{recurse};
292    $verbose  = $Options{verbose}           if defined $Options{verbose};
293}
294
295
296sub absolute_path {
297    my($cwd, $path) = @_;
298        return "$cwd/$path" unless $path =~ m:/:;
299    # add cwd if path is not already an absolute path
300    $path = "$cwd/$path" if (substr($path,0,1) ne '/');
301    return $path;
302}
303
304
305sub create_index {
306    my($html, $dir) = @_;
307    (my $pod = $dir) =~ s,^.*/,,;
308    my(@files, @filedata, @index, $file);
309    my($lcp1,$lcp2);
310
311
312    # get the list of .html files in this directory
313    opendir(DIR, $dir) ||
314        die "$0: error opening directory $dir for reading: $!\n";
315    @files = sort(grep(/\.html?$/, readdir(DIR)));
316    closedir(DIR);
317
318    open(HTML, ">$html") ||
319        die "$0: error opening $html for output: $!\n";
320
321    # for each .html file in the directory, extract the index
322    #   embedded in the file and throw it into the big index.
323    print HTML "<DL COMPACT>\n";
324    foreach $file (@files) {
325        $/ = "";
326
327        open(IN, "<$dir/$file") ||
328            die "$0: error opening $dir/$file for input: $!\n";
329        @filedata = <IN>;
330        close(IN);
331
332        # pull out the NAME section
333        my $name;
334        ($name) = grep(/name="name"/i, @filedata);
335        ($lcp1,$lcp2) = ($name =~ m,/H1>\s(\S+)\s[\s-]*(.*?)\s*$,smi);
336        if (defined $lcp1 and $lcp1 =~ m,^<P>$,i) { # Uninteresting.  Try again.
337            ($lcp1,$lcp2) = ($name =~ m,/H1>\s<P>\s*(\S+)\s[\s-]*(.*?)\s*$,smi);
338        }
339        my $url= "$pod/$file" ;
340        if ( ! defined $Options{htmlroot} || $Options{htmlroot} eq '' ) {
341            $url = Pod::Html::relativize_url( "$pod/$file", $html ) ;
342        }
343
344        if (defined $lcp1) {
345            print HTML qq(<DT><A HREF="$url">);
346            print HTML "$lcp1</A></DT><DD>$lcp2</DD>\n";
347        }
348
349        next;
350
351        @index = grep(/<!-- INDEX BEGIN -->.*<!-- INDEX END -->/s,
352                    @filedata);
353        for (@index) {
354            s/<!-- INDEX BEGIN -->(\s*<!--)(.*)(-->\s*)<!-- INDEX END -->/$lcp2/s;
355            s,#,$dir/$file#,g;
356            print HTML "$_\n<P><HR><P>\n";
357        }
358    }
359    print HTML "</DL>\n";
360
361    close(HTML);
362}
363
364
365sub split_on_head {
366    my($podroot, $htmldir, $splitdirs, $ignore, @splithead) = @_;
367    my($pod, $dirname, $filename);
368
369    # split the files specified in @splithead on =head[1-6] pod directives
370    print "splitting files by head.\n" if $verbose && $#splithead >= 0;
371    foreach $pod (@splithead) {
372        # figure out the directory name and filename
373        $pod      =~ s,^([^/]*)$,/$1,;
374        $pod      =~ m,(.*)/(.*?)(\.pod)?$,;
375        $dirname  = $1;
376        $filename = "$2.pod";
377
378        # since we are splitting this file it shouldn't be converted.
379        push(@$ignore, "$podroot/$dirname/$filename");
380
381        # split the pod
382        splitpod("$podroot/$dirname/$filename", "$podroot/$dirname", $htmldir,
383            $splitdirs);
384    }
385}
386
387
388sub split_on_item {
389    my($podroot, $splitdirs, $ignore, @splititem) = @_;
390    my($pwd, $dirname, $filename);
391
392    print "splitting files by item.\n" if $verbose && $#splititem >= 0;
393    $pwd = getcwd();
394    my $splitter = absolute_path($pwd, "$splitpod/splitpod");
395    my $perl = absolute_path($pwd, $^X);
396    foreach my $pod (@splititem) {
397        # figure out the directory to split into
398        $pod      =~ s,^([^/]*)$,/$1,;
399        $pod      =~ m,(.*)/(.*?)(\.pod)?$,;
400        $dirname  = "$1/$2";
401        $filename = "$2.pod";
402
403        # since we are splitting this file it shouldn't be converted.
404        push(@$ignore, "$podroot/$dirname.pod");
405
406        # split the pod
407        push(@$splitdirs, "$podroot/$dirname");
408        if (! -d "$podroot/$dirname") {
409            mkdir("$podroot/$dirname", 0755) ||
410                    die "$0: error creating directory $podroot/$dirname: $!\n";
411        }
412        chdir("$podroot/$dirname") ||
413            die "$0: error changing to directory $podroot/$dirname: $!\n";
414        die "$splitter not found. Use '-splitpod dir' option.\n"
415            unless -f $splitter;
416        system($perl, $splitter, "../$filename") &&
417            warn "$0: error running '$splitter ../$filename'"
418                 ." from $podroot/$dirname";
419    }
420    chdir($pwd);
421}
422
423
424#
425# splitpod - splits a .pod file into several smaller .pod files
426#  where a new file is started each time a =head[1-6] pod directive
427#  is encountered in the input file.
428#
429sub splitpod {
430    my($pod, $poddir, $htmldir, $splitdirs) = @_;
431    my(@poddata, @filedata, @heads);
432    my($file, $i, $j, $prevsec, $section, $nextsec);
433
434    print "splitting $pod\n" if $verbose;
435
436    # read the file in paragraphs
437    $/ = "";
438    open(SPLITIN, "<$pod") ||
439        die "$0: error opening $pod for input: $!\n";
440    @filedata = <SPLITIN>;
441    close(SPLITIN) ||
442        die "$0: error closing $pod: $!\n";
443
444    # restore the file internally by =head[1-6] sections
445    @poddata = ();
446    for ($i = 0, $j = -1; $i <= $#filedata; $i++) {
447        $j++ if ($filedata[$i] =~ /^\s*=head[1-6]/);
448        if ($j >= 0) {
449            $poddata[$j]  = "" unless defined $poddata[$j];
450            $poddata[$j] .= "\n$filedata[$i]" if $j >= 0;
451        }
452    }
453
454    # create list of =head[1-6] sections so that we can rewrite
455    #  L<> links as necessary.
456    my %heads = ();
457    foreach $i (0..$#poddata) {
458        $heads{anchorify($1)} = 1 if $poddata[$i] =~ /=head[1-6]\s+(.*)/;
459    }
460
461    # create a directory of a similar name and store all the
462    #  files in there
463    $pod =~ s,.*/(.*),$1,;      # get the last part of the name
464    my $dir = $pod;
465    $dir =~ s/\.pod//g;
466    push(@$splitdirs, "$poddir/$dir");
467    mkdir("$poddir/$dir", 0755) ||
468        die "$0: could not create directory $poddir/$dir: $!\n"
469        unless -d "$poddir/$dir";
470
471    $poddata[0] =~ /^\s*=head[1-6]\s+(.*)/;
472    $section    = "";
473    $nextsec    = $1;
474
475    # for each section of the file create a separate pod file
476    for ($i = 0; $i <= $#poddata; $i++) {
477        # determine the "prev" and "next" links
478        $prevsec = $section;
479        $section = $nextsec;
480        if ($i < $#poddata) {
481            $poddata[$i+1] =~ /^\s*=head[1-6]\s+(.*)/;
482            $nextsec       = $1;
483        } else {
484            $nextsec = "";
485        }
486
487        # determine an appropriate filename (this must correspond with
488        #  what pod2html will try and guess)
489        # $poddata[$i] =~ /^\s*=head[1-6]\s+(.*)/;
490        $file = "$dir/" . anchorify($section) . ".pod";
491
492        # create the new .pod file
493        print "\tcreating $poddir/$file\n" if $verbose;
494        open(SPLITOUT, ">$poddir/$file") ||
495            die "$0: error opening $poddir/$file for output: $!\n";
496        $poddata[$i] =~ s,L<([^<>]*)>,
497                        defined $heads{anchorify($1)} ? "L<$dir/$1>" : "L<$1>"
498                     ,ge;
499        print SPLITOUT $poddata[$i]."\n\n";
500        print SPLITOUT "=over 4\n\n";
501        print SPLITOUT "=item *\n\nBack to L<$dir/\"$prevsec\">\n\n" if $prevsec;
502        print SPLITOUT "=item *\n\nForward to L<$dir/\"$nextsec\">\n\n" if $nextsec;
503        print SPLITOUT "=item *\n\nUp to L<$dir>\n\n";
504        print SPLITOUT "=back\n\n";
505        close(SPLITOUT) ||
506            die "$0: error closing $poddir/$file: $!\n";
507    }
508}
509
510
511#
512# installdir - takes care of converting the .pod and .pm files in the
513#  current directory to .html files and then installing those.
514#
515sub installdir {
516    my($dir, $recurse, $podroot, $splitdirs, $ignore) = @_;
517    my(@dirlist, @podlist, @pmlist, $doindex);
518
519    @dirlist = ();      # directories to recurse on
520    @podlist = ();      # .pod files to install
521    @pmlist  = ();      # .pm files to install
522
523    # should files in this directory get an index?
524    $doindex = (grep($_ eq "$podroot/$dir", @$splitdirs) ? 0 : 1);
525
526    opendir(DIR, "$podroot/$dir")
527        || die "$0: error opening directory $podroot/$dir: $!\n";
528
529    # find the directories to recurse on
530    @dirlist = map { if ($^O eq 'VMS') {/^(.*)\.dir$/i; "$dir/$1";} else {"$dir/$_";}}
531        grep(-d "$podroot/$dir/$_" && !/^\.{1,2}/, readdir(DIR)) if $recurse;
532    rewinddir(DIR);
533
534    # find all the .pod files within the directory
535    @podlist = map { /^(.*)\.pod$/; "$dir/$1" }
536        grep(! -d "$podroot/$dir/$_" && /\.pod$/, readdir(DIR));
537    rewinddir(DIR);
538
539    # find all the .pm files within the directory
540    @pmlist = map { /^(.*)\.pm$/; "$dir/$1" }
541        grep(! -d "$podroot/$dir/$_" && /\.pm$/, readdir(DIR));
542
543    closedir(DIR);
544
545    # recurse on all subdirectories we kept track of
546    foreach $dir (@dirlist) {
547        installdir($dir, $recurse, $podroot, $splitdirs, $ignore);
548    }
549
550    # install all the pods we found
551    foreach my $pod (@podlist) {
552        # check if we should ignore it.
553        next if grep($_ eq "$podroot/$pod.pod", @$ignore);
554
555        # check if a .pm files exists too
556        if (grep($_ eq $pod, @pmlist)) {
557            print  "$0: Warning both `$podroot/$pod.pod' and "
558                . "`$podroot/$pod.pm' exist, using pod\n";
559            push(@ignore, "$pod.pm");
560        }
561        runpod2html("$pod.pod", $doindex);
562    }
563
564    # install all the .pm files we found
565    foreach my $pm (@pmlist) {
566        # check if we should ignore it.
567        next if grep($_ eq "$pm.pm", @ignore);
568
569        runpod2html("$pm.pm", $doindex);
570    }
571}
572
573
574#
575# runpod2html - invokes pod2html to convert a .pod or .pm file to a .html
576#  file.
577#
578sub runpod2html {
579    my($pod, $doindex) = @_;
580    my($html, $i, $dir, @dirs);
581
582    $html = $pod;
583    $html =~ s/\.(pod|pm)$/.html/g;
584
585    # make sure the destination directories exist
586    @dirs = split("/", $html);
587    $dir  = "$htmldir/";
588    for ($i = 0; $i < $#dirs; $i++) {
589        if (! -d "$dir$dirs[$i]") {
590            mkdir("$dir$dirs[$i]", 0755) ||
591                die "$0: error creating directory $dir$dirs[$i]: $!\n";
592        }
593        $dir .= "$dirs[$i]/";
594    }
595
596    # invoke pod2html
597    print "$podroot/$pod => $htmldir/$html\n" if $verbose;
598    Pod::Html::pod2html(
599        "--htmldir=$htmldir",
600        "--htmlroot=$htmlroot",
601        "--podpath=".join(":", @podpath),
602        "--podroot=$podroot", "--netscape",
603        "--header",
604        ($doindex ? "--index" : "--noindex"),
605        "--" . ($recurse ? "" : "no") . "recurse",
606        ($#libpods >= 0) ? "--libpods=" . join(":", @libpods) : "",
607        "--infile=$podroot/$pod", "--outfile=$htmldir/$html");
608    die "$0: error running $pod2html: $!\n" if $?;
609}
Note: See TracBrowser for help on using the repository browser.