source: trunk/third/perl/installhtml @ 14545

Revision 14545, 17.8 KB checked in by ghudson, 25 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r14544, which included commits to RCS files with non-trunk default branches.
  • Property svn:executable set to *
Line 
1#!./perl -w
2
3# This file should really be a extracted from a .PL
4
5use lib 'lib';          # use source library if present
6
7use Config;             # for config options in the makefile
8use Getopt::Long;       # for command-line parsing
9use Cwd;
10use Pod::Html;
11
12=head1 NAME
13
14installhtml - converts a collection of POD pages to HTML format.
15
16=head1 SYNOPSIS
17
18    installhtml  [--help] [--podpath=<name>:...:<name>] [--podroot=<name>]
19         [--htmldir=<name>] [--htmlroot=<name>]  [--norecurse] [--recurse]
20         [--splithead=<name>,...,<name>]   [--splititem=<name>,...,<name>]
21         [--libpods=<name>,...,<name>]  [--verbose]
22
23=head1 DESCRIPTION
24
25I<installhtml> converts a collection of POD pages to a corresponding
26collection of HTML pages.  This is primarily used to convert the pod
27pages found in the perl distribution.
28
29=head1 OPTIONS
30
31=over 4
32
33=item B<--help> help
34
35Displays the usage.
36
37=item B<--podroot> POD search path base directory
38
39The base directory to search for all .pod and .pm files to be converted.
40Default is current directory.
41
42=item B<--podpath> POD search path
43
44The list of directories to search for .pod and .pm files to be converted.
45Default is `podroot/.'.
46
47=item B<--recurse> recurse on subdirectories
48
49Whether or not to convert all .pm and .pod files found in subdirectories
50too.  Default is to not recurse.
51
52=item B<--htmldir> HTML destination directory
53
54The base directory which all HTML files will be written to.  This should
55be a path relative to the filesystem, not the resulting URL.
56
57=item B<--htmlroot> URL base directory
58
59The base directory which all resulting HTML files will be visible at in
60a URL.  The default is `/'.
61
62=item B<--splithead> POD files to split on =head directive
63
64Comma-separated list of pod files to split by the =head directive.  The
65.pod suffix is optional. These files should have names specified
66relative to podroot.
67
68=item B<--splititem> POD files to split on =item directive
69
70Comma-separated list of all pod files to split by the =item directive.
71The .pod suffix is optional.  I<installhtml> does not do the actual
72split, rather it invokes I<splitpod> to do the dirty work.  As with
73--splithead, these files should have names specified relative to podroot.
74
75=item B<--splitpod> Directory containing the splitpod program
76
77The directory containing the splitpod program. The default is `podroot/pod'.
78
79=item B<--libpods> library PODs for LE<lt>E<gt> links
80
81Comma-separated list of "library" pod files.  This is the same list that
82will be passed to pod2html when any pod is converted.
83
84=item B<--verbose> verbose output
85
86Self-explanatory.
87
88=back
89
90=head1 EXAMPLE
91
92The following command-line is an example of the one we use to convert
93perl documentation:
94
95    ./installhtml --podpath=lib:ext:pod:vms   \
96                        --podroot=/usr/src/perl     \
97                        --htmldir=/perl/nmanual     \
98                        --htmlroot=/perl/nmanual    \
99                        --splithead=pod/perlipc     \
100                        --splititem=pod/perlfunc    \
101                        --libpods=perlfunc,perlguts,perlvar,perlrun,perlop \
102                        --recurse \
103                        --verbose
104
105=head1 AUTHOR
106
107Chris Hall E<lt>hallc@cs.colorado.eduE<gt>
108
109=head1 TODO
110
111=cut
112
113$usage =<<END_OF_USAGE;
114Usage: $0 --help --podpath=<name>:...:<name> --podroot=<name>
115         --htmldir=<name> --htmlroot=<name> --norecurse --recurse
116         --splithead=<name>,...,<name> --splititem=<name>,...,<name>
117         --libpods=<name>,...,<name> --verbose
118
119    --help      - this message
120    --podpath   - colon-separated list of directories containing .pod and
121                  .pm files to be converted (. by default).
122    --podroot   - filesystem base directory from which all relative paths in
123                  podpath stem (default is .).
124    --htmldir   - directory to store resulting html files in relative
125                  to the filesystem (\$podroot/html by default).
126    --htmlroot  - http-server base directory from which all relative paths
127                  in podpath stem (default is /).
128    --libpods   - comma-separated list of files to search for =item pod
129                  directives in as targets of C<> and implicit links (empty
130                  by default).
131    --norecurse - don't recurse on those subdirectories listed in podpath.
132                  (default behavior).
133    --recurse   - recurse on those subdirectories listed in podpath
134    --splithead - comma-separated list of .pod or .pm files to split.  will
135                  split each file into several smaller files at every occurrence
136                  of a pod =head[1-6] directive.
137    --splititem - comma-separated list of .pod or .pm files to split using
138                  splitpod.
139    --splitpod  - directory where the program splitpod can be found
140                  (\$podroot/pod by default).
141    --verbose   - self-explanatory.
142
143END_OF_USAGE
144
145@libpods = ();
146@podpath = ( "." );     # colon-separated list of directories containing .pod
147                        # and .pm files to be converted.
148$podroot = ".";         # assume the pods we want are here
149$htmldir = "";          # nothing for now...
150$htmlroot = "/";        # default value
151$recurse = 0;           # default behavior
152@splithead = ();        # don't split any files by default
153@splititem = ();        # don't split any files by default
154$splitpod = "";         # nothing for now.
155
156$verbose = 0;           # whether or not to print debugging info
157
158$pod2html = "pod/pod2html";
159
160usage("") unless @ARGV;
161
162# Overcome shell's p1,..,p8 limitation. 
163# See vms/descrip_mms.template -> descrip.mms for invokation.
164if ( $^O eq 'VMS' ) { @ARGV = split(/\s+/,$ARGV[0]); }
165
166# parse the command-line
167$result = GetOptions( qw(
168        help
169        podpath=s
170        podroot=s
171        htmldir=s
172        htmlroot=s
173        libpods=s
174        recurse!
175        splithead=s
176        splititem=s
177        splitpod=s
178        verbose
179));
180usage("invalid parameters") unless $result;
181parse_command_line();
182
183
184# set these variables to appropriate values if the user didn't specify
185#  values for them.
186$htmldir = "$htmlroot/html" unless $htmldir;
187$splitpod = "$podroot/pod" unless $splitpod;
188
189
190# make sure that the destination directory exists
191(mkdir($htmldir, 0755) ||
192        die "$0: cannot make directory $htmldir: $!\n") if ! -d $htmldir;
193
194
195# the following array will eventually contain files that are to be
196# ignored in the conversion process.  these are files that have been
197# process by splititem or splithead and should not be converted as a
198# result.
199@ignore = ();
200
201
202# split pods.  its important to do this before convert ANY pods because
203#  it may effect some of the links
204@splitdirs = ();    # files in these directories won't get an index
205split_on_head($podroot, $htmldir, \@splitdirs, \@ignore, @splithead);
206split_on_item($podroot,           \@splitdirs, \@ignore, @splititem);
207
208
209# convert the pod pages found in @poddirs
210#warn "converting files\n" if $verbose;
211#warn "\@ignore\t= @ignore\n" if $verbose;
212foreach $dir (@podpath) {
213    installdir($dir, $recurse, $podroot, \@splitdirs, \@ignore);
214}
215
216
217# now go through and create master indices for each pod we split
218foreach $dir (@splititem) {
219    print "creating index $htmldir/$dir.html\n" if $verbose;
220    create_index("$htmldir/$dir.html", "$htmldir/$dir");
221}
222
223foreach $dir (@splithead) {
224    $dir .= ".pod" unless $dir =~ /(\.pod|\.pm)$/;
225    # let pod2html create the file
226    runpod2html($dir, 1);
227
228    # now go through and truncate after the index
229    $dir =~ /^(.*?)(\.pod|\.pm)?$/sm;
230    $file = "$htmldir/$1";
231    print "creating index $file.html\n" if $verbose;
232
233    # read in everything until what would have been the first =head
234    # directive, patching the index as we go.
235    open(H, "<$file.html") ||
236        die "$0: error opening $file.html for input: $!\n";
237    $/ = "";
238    @data = ();
239    while (<H>) {
240        last if /NAME=/;
241        $_ =~ s{HREF="#(.*)">}{
242            my $url = "$file/$1.html" ;
243            $url = Pod::Html::relativize_url( $url, "$file.html" )
244                if ( ! defined $opt_htmlroot || $opt_htmlroot eq '' ) ;
245            "HREF=\"$url\">" ;
246        }eg;
247        push @data, $_;
248    }
249    close(H);
250
251    # now rewrite the file
252    open(H, ">$file.html") ||
253        die "$0: error opening $file.html for output: $!\n";
254    print H "@data\n";
255    close(H);
256}
257
258##############################################################################
259
260
261sub usage {
262    warn "$0: @_\n" if @_;
263    die $usage;
264}
265
266
267sub parse_command_line {
268    usage() if defined $opt_help;
269    $opt_help = "";                 # make -w shut up
270
271    # list of directories
272    @podpath   = split(":", $opt_podpath) if defined $opt_podpath;
273
274    # lists of files
275    @splithead = split(",", $opt_splithead) if defined $opt_splithead;
276    @splititem = split(",", $opt_splititem) if defined $opt_splititem;
277    @libpods   = split(",", $opt_libpods) if defined $opt_libpods;
278
279    $htmldir  = $opt_htmldir        if defined $opt_htmldir;
280    $htmlroot = $opt_htmlroot       if defined $opt_htmlroot;
281    $podroot  = $opt_podroot        if defined $opt_podroot;
282    $splitpod = $opt_splitpod       if defined $opt_splitpod;
283
284    $recurse  = $opt_recurse        if defined $opt_recurse;
285    $verbose  = $opt_verbose        if defined $opt_verbose;
286}
287
288
289sub absolute_path {
290    my($cwd, $path) = @_;
291        return "$cwd/$path" unless $path =~ m:/:;
292    # add cwd if path is not already an absolute path
293    $path = "$cwd/$path" if (substr($path,0,1) ne '/');
294    return $path;
295}
296
297
298sub create_index {
299    my($html, $dir) = @_;
300    my(@files, @filedata, @index, $file);
301    my($lcp1,$lcp2);
302
303
304    # get the list of .html files in this directory
305    opendir(DIR, $dir) ||
306        die "$0: error opening directory $dir for reading: $!\n";
307    @files = sort(grep(/\.html?$/, readdir(DIR)));
308    closedir(DIR);
309
310    open(HTML, ">$html") ||
311        die "$0: error opening $html for output: $!\n";
312
313    # for each .html file in the directory, extract the index
314    #   embedded in the file and throw it into the big index.
315    print HTML "<DL COMPACT>\n";
316    foreach $file (@files) {
317        $/ = "";
318
319        open(IN, "<$dir/$file") ||
320            die "$0: error opening $dir/$file for input: $!\n";
321        @filedata = <IN>;
322        close(IN);
323
324        # pull out the NAME section
325        ($name) = grep(/NAME=/, @filedata);
326        ($lcp1,$lcp2) = ($name =~ m,/H1>\s(\S+)\s[\s-]*(.*?)\s*$,sm);
327        if (defined $lcp1 and $lcp1 eq '<P>') { # Uninteresting.  Try again.
328           ($lcp1,$lcp2) = ($name =~ m,/H1>\s<P>\s(\S+)\s[\s-]*(.*?)\s*$,sm);
329        }
330        my $url= "$dir/$file" ;
331        if ( ! defined $opt_htmlroot || $opt_htmlroot eq '' ) {
332            $url = Pod::Html::relativize_url( "$dir/$file", $html ) ;
333        }
334
335        print HTML qq(<A HREF="$url">);
336        print HTML "<DT>$lcp1</A><DD>$lcp2\n" if defined $lcp1;
337#       print HTML qq(<A HREF="$url">$lcp1</A><BR>\n") if defined $lcp1;
338
339        next;
340
341        @index = grep(/<!-- INDEX BEGIN -->.*<!-- INDEX END -->/s,
342                    @filedata);
343        for (@index) {
344            s/<!-- INDEX BEGIN -->(\s*<!--)(.*)(-->\s*)<!-- INDEX END -->/$lcp2/s;
345            s,#,$dir/$file#,g;
346            # print HTML "$_\n";
347            print HTML "$_\n<P><HR><P>\n";
348        }
349    }
350    print HTML "</DL>\n";
351
352    close(HTML);
353}
354
355
356sub split_on_head {
357    my($podroot, $htmldir, $splitdirs, $ignore, @splithead) = @_;
358    my($pod, $dirname, $filename);
359
360    # split the files specified in @splithead on =head[1-6] pod directives
361    print "splitting files by head.\n" if $verbose && $#splithead >= 0;
362    foreach $pod (@splithead) {
363        # figure out the directory name and filename
364        $pod      =~ s,^([^/]*)$,/$1,;
365        $pod      =~ m,(.*?)/(.*?)(\.pod)?$,;
366        $dirname  = $1;
367        $filename = "$2.pod";
368
369        # since we are splitting this file it shouldn't be converted.
370        push(@$ignore, "$podroot/$dirname/$filename");
371
372        # split the pod
373        splitpod("$podroot/$dirname/$filename", "$podroot/$dirname", $htmldir,
374            $splitdirs);
375    }
376}
377
378
379sub split_on_item {
380    my($podroot, $splitdirs, $ignore, @splititem) = @_;
381    my($pwd, $dirname, $filename);
382
383    print "splitting files by item.\n" if $verbose && $#splititem >= 0;
384    $pwd = getcwd();
385        my $splitter = absolute_path($pwd, "$splitpod/splitpod");
386    foreach $pod (@splititem) {
387        # figure out the directory to split into
388        $pod      =~ s,^([^/]*)$,/$1,;
389        $pod      =~ m,(.*?)/(.*?)(\.pod)?$,;
390        $dirname  = "$1/$2";
391        $filename = "$2.pod";
392
393        # since we are splitting this file it shouldn't be converted.
394        push(@$ignore, "$podroot/$dirname.pod");
395
396        # split the pod
397        push(@$splitdirs, "$podroot/$dirname");
398        if (! -d "$podroot/$dirname") {
399            mkdir("$podroot/$dirname", 0755) ||
400                    die "$0: error creating directory $podroot/$dirname: $!\n";
401        }
402        chdir("$podroot/$dirname") ||
403            die "$0: error changing to directory $podroot/$dirname: $!\n";
404        die "$splitter not found. Use '-splitpod dir' option.\n"
405            unless -f $splitter;
406        system("perl", $splitter, "../$filename") &&
407            warn "$0: error running '$splitter ../$filename'"
408                 ." from $podroot/$dirname";
409    }
410    chdir($pwd);
411}
412
413
414#
415# splitpod - splits a .pod file into several smaller .pod files
416#  where a new file is started each time a =head[1-6] pod directive
417#  is encountered in the input file.
418#
419sub splitpod {
420    my($pod, $poddir, $htmldir, $splitdirs) = @_;
421    my(@poddata, @filedata, @heads);
422    my($file, $i, $j, $prevsec, $section, $nextsec);
423
424    print "splitting $pod\n" if $verbose;
425
426    # read the file in paragraphs
427    $/ = "";
428    open(SPLITIN, "<$pod") ||
429        die "$0: error opening $pod for input: $!\n";
430    @filedata = <SPLITIN>;
431    close(SPLITIN) ||
432        die "$0: error closing $pod: $!\n";
433
434    # restore the file internally by =head[1-6] sections
435    @poddata = ();
436    for ($i = 0, $j = -1; $i <= $#filedata; $i++) {
437        $j++ if ($filedata[$i] =~ /^\s*=head[1-6]/);
438        if ($j >= 0) {
439            $poddata[$j]  = "" unless defined $poddata[$j];
440            $poddata[$j] .= "\n$filedata[$i]" if $j >= 0;
441        }
442    }
443
444    # create list of =head[1-6] sections so that we can rewrite
445    #  L<> links as necessary.
446    %heads = ();
447    foreach $i (0..$#poddata) {
448        $heads{htmlize($1)} = 1 if $poddata[$i] =~ /=head[1-6]\s+(.*)/;
449    }
450
451    # create a directory of a similar name and store all the
452    #  files in there
453    $pod =~ s,.*/(.*),$1,;      # get the last part of the name
454    $dir = $pod;
455    $dir =~ s/\.pod//g;
456    push(@$splitdirs, "$poddir/$dir");
457    mkdir("$poddir/$dir", 0755) ||
458        die "$0: could not create directory $poddir/$dir: $!\n"
459        unless -d "$poddir/$dir";
460
461    $poddata[0] =~ /^\s*=head[1-6]\s+(.*)/;
462    $section    = "";
463    $nextsec    = $1;
464
465    # for each section of the file create a separate pod file
466    for ($i = 0; $i <= $#poddata; $i++) {
467        # determine the "prev" and "next" links
468        $prevsec = $section;
469        $section = $nextsec;
470        if ($i < $#poddata) {
471            $poddata[$i+1] =~ /^\s*=head[1-6]\s+(.*)/;
472            $nextsec       = $1;
473        } else {
474            $nextsec = "";
475        }
476
477        # determine an appropriate filename (this must correspond with
478        #  what pod2html will try and guess)
479        # $poddata[$i] =~ /^\s*=head[1-6]\s+(.*)/;
480        $file = "$dir/" . htmlize($section) . ".pod";
481
482        # create the new .pod file
483        print "\tcreating $poddir/$file\n" if $verbose;
484        open(SPLITOUT, ">$poddir/$file") ||
485            die "$0: error opening $poddir/$file for output: $!\n";
486        $poddata[$i] =~ s,L<([^<>]*)>,
487                            defined $heads{htmlize($1)} ? "L<$dir/$1>" : "L<$1>"
488                         ,ge;
489        print SPLITOUT $poddata[$i]."\n\n";
490        print SPLITOUT "=over 4\n\n";
491        print SPLITOUT "=item *\n\nBack to L<$dir/\"$prevsec\">\n\n" if $prevsec;
492        print SPLITOUT "=item *\n\nForward to L<$dir/\"$nextsec\">\n\n" if $nextsec;
493        print SPLITOUT "=item *\n\nUp to L<$dir>\n\n";
494        print SPLITOUT "=back\n\n";
495        close(SPLITOUT) ||
496            die "$0: error closing $poddir/$file: $!\n";
497    }
498}
499
500
501#
502# installdir - takes care of converting the .pod and .pm files in the
503#  current directory to .html files and then installing those.
504#
505sub installdir {
506    my($dir, $recurse, $podroot, $splitdirs, $ignore) = @_;
507    my(@dirlist, @podlist, @pmlist, $doindex);
508
509    @dirlist = ();      # directories to recurse on
510    @podlist = ();      # .pod files to install
511    @pmlist  = ();      # .pm files to install
512
513    # should files in this directory get an index?
514    $doindex = (grep($_ eq "$podroot/$dir", @$splitdirs) ? 0 : 1);
515
516    opendir(DIR, "$podroot/$dir")
517        || die "$0: error opening directory $podroot/$dir: $!\n";
518
519    # find the directories to recurse on
520    @dirlist = map { if ($^O eq 'VMS') {/^(.*)\.dir$/i; "$dir/$1";} else {"$dir/$_";}}
521        grep(-d "$podroot/$dir/$_" && !/^\.{1,2}/, readdir(DIR)) if $recurse;
522    rewinddir(DIR);
523
524    # find all the .pod files within the directory
525    @podlist = map { /^(.*)\.pod$/; "$dir/$1" }
526        grep(! -d "$podroot/$dir/$_" && /\.pod$/, readdir(DIR));
527    rewinddir(DIR);
528
529    # find all the .pm files within the directory
530    @pmlist = map { /^(.*)\.pm$/; "$dir/$1" }
531        grep(! -d "$podroot/$dir/$_" && /\.pm$/, readdir(DIR));
532
533    closedir(DIR);
534
535    # recurse on all subdirectories we kept track of
536    foreach $dir (@dirlist) {
537        installdir($dir, $recurse, $podroot, $splitdirs, $ignore);
538    }
539
540    # install all the pods we found
541    foreach $pod (@podlist) {
542        # check if we should ignore it.
543        next if grep($_ eq "$podroot/$pod.pod", @$ignore);
544
545        # check if a .pm files exists too
546        if (grep($_ eq "$pod.pm", @pmlist)) {
547            print  "$0: Warning both `$podroot/$pod.pod' and "
548                . "`$podroot/$pod.pm' exist, using pod\n";
549            push(@ignore, "$pod.pm");
550        }
551        runpod2html("$pod.pod", $doindex);
552    }
553
554    # install all the .pm files we found
555    foreach $pm (@pmlist) {
556        # check if we should ignore it.
557        next if grep($_ eq "$pm.pm", @ignore);
558
559        runpod2html("$pm.pm", $doindex);
560    }
561}
562
563
564#
565# runpod2html - invokes pod2html to convert a .pod or .pm file to a .html
566#  file.
567#
568sub runpod2html {
569    my($pod, $doindex) = @_;
570    my($html, $i, $dir, @dirs);
571
572    $html = $pod;
573    $html =~ s/\.(pod|pm)$/.html/g;
574
575    # make sure the destination directories exist
576    @dirs = split("/", $html);
577    $dir  = "$htmldir/";
578    for ($i = 0; $i < $#dirs; $i++) {
579        if (! -d "$dir$dirs[$i]") {
580            mkdir("$dir$dirs[$i]", 0755) ||
581                die "$0: error creating directory $dir$dirs[$i]: $!\n";
582        }
583        $dir .= "$dirs[$i]/";
584    }
585
586    # invoke pod2html
587    print "$podroot/$pod => $htmldir/$html\n" if $verbose;
588#system("./pod2html",
589        Pod::Html'pod2html(
590        #Pod::Html'pod2html($pod2html,
591        "--htmldir=$htmldir",
592        "--htmlroot=$htmlroot",
593        "--podpath=".join(":", @podpath),
594        "--podroot=$podroot", "--netscape",
595        ($doindex ? "--index" : "--noindex"),
596        "--" . ($recurse ? "" : "no") . "recurse",
597        ($#libpods >= 0) ? "--libpods=" . join(":", @libpods) : "",
598        "--infile=$podroot/$pod", "--outfile=$htmldir/$html");
599    die "$0: error running $pod2html: $!\n" if $?;
600}
601
602sub htmlize { htmlify(0, @_) }
Note: See TracBrowser for help on using the repository browser.