source: trunk/third/perl/installhtml @ 18450

Revision 18450, 18.2 KB checked in by zacheiss, 21 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r18449, which included commits to RCS files with non-trunk default branches.
  • Property svn:executable set to *
Line 
1#!./perl -Ilib -w
2
3# This file should really be extracted from a .PL file
4
5use strict;
6use Config;             # for config options in the makefile
7use Getopt::Long;       # for command-line parsing
8use Cwd;
9use Pod::Html;
10
11=head1 NAME
12
13installhtml - converts a collection of POD pages to HTML format.
14
15=head1 SYNOPSIS
16
17    installhtml  [--help] [--podpath=<name>:...:<name>] [--podroot=<name>]
18         [--htmldir=<name>] [--htmlroot=<name>]  [--norecurse] [--recurse]
19         [--splithead=<name>,...,<name>]   [--splititem=<name>,...,<name>]
20         [--libpods=<name>,...,<name>]  [--verbose]
21
22=head1 DESCRIPTION
23
24I<installhtml> converts a collection of POD pages to a corresponding
25collection of HTML pages.  This is primarily used to convert the pod
26pages found in the perl distribution.
27
28=head1 OPTIONS
29
30=over 4
31
32=item B<--help> help
33
34Displays the usage.
35
36=item B<--podroot> POD search path base directory
37
38The base directory to search for all .pod and .pm files to be converted.
39Default is current directory.
40
41=item B<--podpath> POD search path
42
43The list of directories to search for .pod and .pm files to be converted.
44Default is `podroot/.'.
45
46=item B<--recurse> recurse on subdirectories
47
48Whether or not to convert all .pm and .pod files found in subdirectories
49too.  Default is to not recurse.
50
51=item B<--htmldir> HTML destination directory
52
53The base directory which all HTML files will be written to.  This should
54be a path relative to the filesystem, not the resulting URL.
55
56=item B<--htmlroot> URL base directory
57
58The base directory which all resulting HTML files will be visible at in
59a URL.  The default is `/'.
60
61=item B<--splithead> POD files to split on =head directive
62
63Comma-separated list of pod files to split by the =head directive.  The
64.pod suffix is optional. These files should have names specified
65relative to podroot.
66
67=item B<--splititem> POD files to split on =item directive
68
69Comma-separated list of all pod files to split by the =item directive.
70The .pod suffix is optional.  I<installhtml> does not do the actual
71split, rather it invokes I<splitpod> to do the dirty work.  As with
72--splithead, these files should have names specified relative to podroot.
73
74=item B<--splitpod> Directory containing the splitpod program
75
76The directory containing the splitpod program. The default is `podroot/pod'.
77
78=item B<--libpods> library PODs for LE<lt>E<gt> links
79
80Comma-separated list of "library" pod files.  This is the same list that
81will be passed to pod2html when any pod is converted.
82
83=item B<--verbose> verbose output
84
85Self-explanatory.
86
87=back
88
89=head1 EXAMPLE
90
91The following command-line is an example of the one we use to convert
92perl documentation:
93
94    ./installhtml --podpath=lib:ext:pod:vms   \
95                        --podroot=/usr/src/perl     \
96                        --htmldir=/perl/nmanual     \
97                        --htmlroot=/perl/nmanual    \
98                        --splithead=pod/perlipc     \
99                        --splititem=pod/perlfunc    \
100                        --libpods=perlfunc,perlguts,perlvar,perlrun,perlop \
101                        --recurse \
102                        --verbose
103
104=head1 AUTHOR
105
106Chris Hall E<lt>hallc@cs.colorado.eduE<gt>
107
108=head1 TODO
109
110=cut
111
112my $usage;
113
114$usage =<<END_OF_USAGE;
115Usage: $0 --help --podpath=<name>:...:<name> --podroot=<name>
116         --htmldir=<name> --htmlroot=<name> --norecurse --recurse
117         --splithead=<name>,...,<name> --splititem=<name>,...,<name>
118         --libpods=<name>,...,<name> --verbose
119
120    --help      - this message
121    --podpath   - colon-separated list of directories containing .pod and
122                  .pm files to be converted (. by default).
123    --podroot   - filesystem base directory from which all relative paths in
124                  podpath stem (default is .).
125    --htmldir   - directory to store resulting html files in relative
126                  to the filesystem (\$podroot/html by default).
127    --htmlroot  - http-server base directory from which all relative paths
128                  in podpath stem (default is /).
129    --libpods   - comma-separated list of files to search for =item pod
130                  directives in as targets of C<> and implicit links (empty
131                  by default).
132    --norecurse - don't recurse on those subdirectories listed in podpath.
133                  (default behavior).
134    --recurse   - recurse on those subdirectories listed in podpath
135    --splithead - comma-separated list of .pod or .pm files to split.  will
136                  split each file into several smaller files at every occurrence
137                  of a pod =head[1-6] directive.
138    --splititem - comma-separated list of .pod or .pm files to split using
139                  splitpod.
140    --splitpod  - directory where the program splitpod can be found
141                  (\$podroot/pod by default).
142    --verbose   - self-explanatory.
143
144END_OF_USAGE
145
146my (@libpods, @podpath, $podroot, $htmldir, $htmlroot, $recurse, @splithead,
147    @splititem, $splitpod, $verbose, $pod2html);
148
149@libpods = ();
150@podpath = ( "." );     # colon-separated list of directories containing .pod
151                        # and .pm files to be converted.
152$podroot = ".";         # assume the pods we want are here
153$htmldir = "";          # nothing for now...
154$htmlroot = "/";        # default value
155$recurse = 0;           # default behavior
156@splithead = ();        # don't split any files by default
157@splititem = ();        # don't split any files by default
158$splitpod = "";         # nothing for now.
159
160$verbose = 0;           # whether or not to print debugging info
161
162$pod2html = "pod/pod2html";
163
164usage("") unless @ARGV;
165
166# Overcome shell's p1,..,p8 limitation. 
167# See vms/descrip_mms.template -> descrip.mms for invokation.
168if ( $^O eq 'VMS' ) { @ARGV = split(/\s+/,$ARGV[0]); }
169
170use vars qw( %Options );
171
172# parse the command-line
173my $result = GetOptions( \%Options, qw(
174        help
175        podpath=s
176        podroot=s
177        htmldir=s
178        htmlroot=s
179        libpods=s
180        recurse!
181        splithead=s
182        splititem=s
183        splitpod=s
184        verbose
185));
186usage("invalid parameters") unless $result;
187parse_command_line();
188
189
190# set these variables to appropriate values if the user didn't specify
191#  values for them.
192$htmldir = "$htmlroot/html" unless $htmldir;
193$splitpod = "$podroot/pod" unless $splitpod;
194
195
196# make sure that the destination directory exists
197(mkdir($htmldir, 0755) ||
198        die "$0: cannot make directory $htmldir: $!\n") if ! -d $htmldir;
199
200
201# the following array will eventually contain files that are to be
202# ignored in the conversion process.  these are files that have been
203# process by splititem or splithead and should not be converted as a
204# result.
205my @ignore = ();
206my @splitdirs;
207
208# split pods.  its important to do this before convert ANY pods because
209#  it may effect some of the links
210@splitdirs = ();    # files in these directories won't get an index
211split_on_head($podroot, $htmldir, \@splitdirs, \@ignore, @splithead);
212split_on_item($podroot,           \@splitdirs, \@ignore, @splititem);
213
214
215# convert the pod pages found in @poddirs
216#warn "converting files\n" if $verbose;
217#warn "\@ignore\t= @ignore\n" if $verbose;
218foreach my $dir (@podpath) {
219    installdir($dir, $recurse, $podroot, \@splitdirs, \@ignore);
220}
221
222
223# now go through and create master indices for each pod we split
224foreach my $dir (@splititem) {
225    print "creating index $htmldir/$dir.html\n" if $verbose;
226    create_index("$htmldir/$dir.html", "$htmldir/$dir");
227}
228
229foreach my $dir (@splithead) {
230    $dir .= ".pod" unless $dir =~ /(\.pod|\.pm)$/;
231    # let pod2html create the file
232    runpod2html($dir, 1);
233
234    # now go through and truncate after the index
235    $dir =~ /^(.*?)(\.pod|\.pm)?$/sm;
236    my $file = "$htmldir/$1";
237    print "creating index $file.html\n" if $verbose;
238
239    # read in everything until what would have been the first =head
240    # directive, patching the index as we go.
241    open(H, "<$file.html") ||
242        die "$0: error opening $file.html for input: $!\n";
243    $/ = "";
244    my @data = ();
245    while (<H>) {
246        last if /NAME=/;
247        $_ =~ s{HREF="#(.*)">}{
248            my $url = "$file/$1.html" ;
249            $url = Pod::Html::relativize_url( $url, "$file.html" )
250                if ( ! defined $Options{htmlroot} || $Options{htmlroot} eq '' );
251            "HREF=\"$url\">" ;
252        }eg;
253        push @data, $_;
254    }
255    close(H);
256
257    # now rewrite the file
258    open(H, ">$file.html") ||
259        die "$0: error opening $file.html for output: $!\n";
260    print H "@data", "\n";
261    close(H);
262}
263
264##############################################################################
265
266
267sub usage {
268    warn "$0: @_\n" if @_;
269    die $usage;
270}
271
272
273sub parse_command_line {
274    usage() if defined $Options{help};
275    $Options{help} = "";                    # make -w shut up
276
277    # list of directories
278    @podpath   = split(":", $Options{podpath}) if defined $Options{podpath};
279
280    # lists of files
281    @splithead = split(",", $Options{splithead}) if defined $Options{splithead};
282    @splititem = split(",", $Options{splititem}) if defined $Options{splititem};
283    @libpods   = split(",", $Options{libpods}) if defined $Options{libpods};
284
285    $htmldir  = $Options{htmldir}           if defined $Options{htmldir};
286    $htmlroot = $Options{htmlroot}          if defined $Options{htmlroot};
287    $podroot  = $Options{podroot}           if defined $Options{podroot};
288    $splitpod = $Options{splitpod}          if defined $Options{splitpod};
289
290    $recurse  = $Options{recurse}           if defined $Options{recurse};
291    $verbose  = $Options{verbose}           if defined $Options{verbose};
292}
293
294
295sub absolute_path {
296    my($cwd, $path) = @_;
297        return "$cwd/$path" unless $path =~ m:/:;
298    # add cwd if path is not already an absolute path
299    $path = "$cwd/$path" if (substr($path,0,1) ne '/');
300    return $path;
301}
302
303
304sub create_index {
305    my($html, $dir) = @_;
306    my(@files, @filedata, @index, $file);
307    my($lcp1,$lcp2);
308
309
310    # get the list of .html files in this directory
311    opendir(DIR, $dir) ||
312        die "$0: error opening directory $dir for reading: $!\n";
313    @files = sort(grep(/\.html?$/, readdir(DIR)));
314    closedir(DIR);
315
316    open(HTML, ">$html") ||
317        die "$0: error opening $html for output: $!\n";
318
319    # for each .html file in the directory, extract the index
320    #   embedded in the file and throw it into the big index.
321    print HTML "<DL COMPACT>\n";
322    foreach $file (@files) {
323        $/ = "";
324
325        open(IN, "<$dir/$file") ||
326            die "$0: error opening $dir/$file for input: $!\n";
327        @filedata = <IN>;
328        close(IN);
329
330        # pull out the NAME section
331  my $name;
332        ($name) = grep(/NAME=/, @filedata);
333        ($lcp1,$lcp2) = ($name =~ m,/H1>\s(\S+)\s[\s-]*(.*?)\s*$,sm);
334        if (defined $lcp1 and $lcp1 eq '<P>') { # Uninteresting.  Try again.
335           ($lcp1,$lcp2) = ($name =~ m,/H1>\s<P>\s(\S+)\s[\s-]*(.*?)\s*$,sm);
336        }
337        my $url= "$dir/$file" ;
338        if ( ! defined $Options{htmlroot} || $Options{htmlroot} eq '' ) {
339            $url = Pod::Html::relativize_url( "$dir/$file", $html ) ;
340        }
341
342        print HTML qq(<A HREF="$url">);
343        print HTML "<DT>$lcp1</A><DD>$lcp2\n" if defined $lcp1;
344#       print HTML qq(<A HREF="$url">$lcp1</A><BR>\n") if defined $lcp1;
345
346        next;
347
348        @index = grep(/<!-- INDEX BEGIN -->.*<!-- INDEX END -->/s,
349                    @filedata);
350        for (@index) {
351            s/<!-- INDEX BEGIN -->(\s*<!--)(.*)(-->\s*)<!-- INDEX END -->/$lcp2/s;
352            s,#,$dir/$file#,g;
353            # print HTML "$_\n";
354            print HTML "$_\n<P><HR><P>\n";
355        }
356    }
357    print HTML "</DL>\n";
358
359    close(HTML);
360}
361
362
363sub split_on_head {
364    my($podroot, $htmldir, $splitdirs, $ignore, @splithead) = @_;
365    my($pod, $dirname, $filename);
366
367    # split the files specified in @splithead on =head[1-6] pod directives
368    print "splitting files by head.\n" if $verbose && $#splithead >= 0;
369    foreach $pod (@splithead) {
370        # figure out the directory name and filename
371        $pod      =~ s,^([^/]*)$,/$1,;
372        $pod      =~ m,(.*?)/(.*?)(\.pod)?$,;
373        $dirname  = $1;
374        $filename = "$2.pod";
375
376        # since we are splitting this file it shouldn't be converted.
377        push(@$ignore, "$podroot/$dirname/$filename");
378
379        # split the pod
380        splitpod("$podroot/$dirname/$filename", "$podroot/$dirname", $htmldir,
381            $splitdirs);
382    }
383}
384
385
386sub split_on_item {
387    my($podroot, $splitdirs, $ignore, @splititem) = @_;
388    my($pwd, $dirname, $filename);
389
390    print "splitting files by item.\n" if $verbose && $#splititem >= 0;
391    $pwd = getcwd();
392        my $splitter = absolute_path($pwd, "$splitpod/splitpod");
393    foreach my $pod (@splititem) {
394        # figure out the directory to split into
395        $pod      =~ s,^([^/]*)$,/$1,;
396        $pod      =~ m,(.*?)/(.*?)(\.pod)?$,;
397        $dirname  = "$1/$2";
398        $filename = "$2.pod";
399
400        # since we are splitting this file it shouldn't be converted.
401        push(@$ignore, "$podroot/$dirname.pod");
402
403        # split the pod
404        push(@$splitdirs, "$podroot/$dirname");
405        if (! -d "$podroot/$dirname") {
406            mkdir("$podroot/$dirname", 0755) ||
407                    die "$0: error creating directory $podroot/$dirname: $!\n";
408        }
409        chdir("$podroot/$dirname") ||
410            die "$0: error changing to directory $podroot/$dirname: $!\n";
411        die "$splitter not found. Use '-splitpod dir' option.\n"
412            unless -f $splitter;
413        system("perl", $splitter, "../$filename") &&
414            warn "$0: error running '$splitter ../$filename'"
415                 ." from $podroot/$dirname";
416    }
417    chdir($pwd);
418}
419
420
421#
422# splitpod - splits a .pod file into several smaller .pod files
423#  where a new file is started each time a =head[1-6] pod directive
424#  is encountered in the input file.
425#
426sub splitpod {
427    my($pod, $poddir, $htmldir, $splitdirs) = @_;
428    my(@poddata, @filedata, @heads);
429    my($file, $i, $j, $prevsec, $section, $nextsec);
430
431    print "splitting $pod\n" if $verbose;
432
433    # read the file in paragraphs
434    $/ = "";
435    open(SPLITIN, "<$pod") ||
436        die "$0: error opening $pod for input: $!\n";
437    @filedata = <SPLITIN>;
438    close(SPLITIN) ||
439        die "$0: error closing $pod: $!\n";
440
441    # restore the file internally by =head[1-6] sections
442    @poddata = ();
443    for ($i = 0, $j = -1; $i <= $#filedata; $i++) {
444        $j++ if ($filedata[$i] =~ /^\s*=head[1-6]/);
445        if ($j >= 0) {
446            $poddata[$j]  = "" unless defined $poddata[$j];
447            $poddata[$j] .= "\n$filedata[$i]" if $j >= 0;
448        }
449    }
450
451    # create list of =head[1-6] sections so that we can rewrite
452    #  L<> links as necessary.
453    my %heads = ();
454    foreach $i (0..$#poddata) {
455        $heads{htmlize($1)} = 1 if $poddata[$i] =~ /=head[1-6]\s+(.*)/;
456    }
457
458    # create a directory of a similar name and store all the
459    #  files in there
460    $pod =~ s,.*/(.*),$1,;      # get the last part of the name
461    my $dir = $pod;
462    $dir =~ s/\.pod//g;
463    push(@$splitdirs, "$poddir/$dir");
464    mkdir("$poddir/$dir", 0755) ||
465        die "$0: could not create directory $poddir/$dir: $!\n"
466        unless -d "$poddir/$dir";
467
468    $poddata[0] =~ /^\s*=head[1-6]\s+(.*)/;
469    $section    = "";
470    $nextsec    = $1;
471
472    # for each section of the file create a separate pod file
473    for ($i = 0; $i <= $#poddata; $i++) {
474        # determine the "prev" and "next" links
475        $prevsec = $section;
476        $section = $nextsec;
477        if ($i < $#poddata) {
478            $poddata[$i+1] =~ /^\s*=head[1-6]\s+(.*)/;
479            $nextsec       = $1;
480        } else {
481            $nextsec = "";
482        }
483
484        # determine an appropriate filename (this must correspond with
485        #  what pod2html will try and guess)
486        # $poddata[$i] =~ /^\s*=head[1-6]\s+(.*)/;
487        $file = "$dir/" . htmlize($section) . ".pod";
488
489        # create the new .pod file
490        print "\tcreating $poddir/$file\n" if $verbose;
491        open(SPLITOUT, ">$poddir/$file") ||
492            die "$0: error opening $poddir/$file for output: $!\n";
493        $poddata[$i] =~ s,L<([^<>]*)>,
494                            defined $heads{htmlize($1)} ? "L<$dir/$1>" : "L<$1>"
495                         ,ge;
496        print SPLITOUT $poddata[$i]."\n\n";
497        print SPLITOUT "=over 4\n\n";
498        print SPLITOUT "=item *\n\nBack to L<$dir/\"$prevsec\">\n\n" if $prevsec;
499        print SPLITOUT "=item *\n\nForward to L<$dir/\"$nextsec\">\n\n" if $nextsec;
500        print SPLITOUT "=item *\n\nUp to L<$dir>\n\n";
501        print SPLITOUT "=back\n\n";
502        close(SPLITOUT) ||
503            die "$0: error closing $poddir/$file: $!\n";
504    }
505}
506
507
508#
509# installdir - takes care of converting the .pod and .pm files in the
510#  current directory to .html files and then installing those.
511#
512sub installdir {
513    my($dir, $recurse, $podroot, $splitdirs, $ignore) = @_;
514    my(@dirlist, @podlist, @pmlist, $doindex);
515
516    @dirlist = ();      # directories to recurse on
517    @podlist = ();      # .pod files to install
518    @pmlist  = ();      # .pm files to install
519
520    # should files in this directory get an index?
521    $doindex = (grep($_ eq "$podroot/$dir", @$splitdirs) ? 0 : 1);
522
523    opendir(DIR, "$podroot/$dir")
524        || die "$0: error opening directory $podroot/$dir: $!\n";
525
526    # find the directories to recurse on
527    @dirlist = map { if ($^O eq 'VMS') {/^(.*)\.dir$/i; "$dir/$1";} else {"$dir/$_";}}
528        grep(-d "$podroot/$dir/$_" && !/^\.{1,2}/, readdir(DIR)) if $recurse;
529    rewinddir(DIR);
530
531    # find all the .pod files within the directory
532    @podlist = map { /^(.*)\.pod$/; "$dir/$1" }
533        grep(! -d "$podroot/$dir/$_" && /\.pod$/, readdir(DIR));
534    rewinddir(DIR);
535
536    # find all the .pm files within the directory
537    @pmlist = map { /^(.*)\.pm$/; "$dir/$1" }
538        grep(! -d "$podroot/$dir/$_" && /\.pm$/, readdir(DIR));
539
540    closedir(DIR);
541
542    # recurse on all subdirectories we kept track of
543    foreach $dir (@dirlist) {
544        installdir($dir, $recurse, $podroot, $splitdirs, $ignore);
545    }
546
547    # install all the pods we found
548    foreach my $pod (@podlist) {
549        # check if we should ignore it.
550        next if grep($_ eq "$podroot/$pod.pod", @$ignore);
551
552        # check if a .pm files exists too
553        if (grep($_ eq "$pod.pm", @pmlist)) {
554            print  "$0: Warning both `$podroot/$pod.pod' and "
555                . "`$podroot/$pod.pm' exist, using pod\n";
556            push(@ignore, "$pod.pm");
557        }
558        runpod2html("$pod.pod", $doindex);
559    }
560
561    # install all the .pm files we found
562    foreach my $pm (@pmlist) {
563        # check if we should ignore it.
564        next if grep($_ eq "$pm.pm", @ignore);
565
566        runpod2html("$pm.pm", $doindex);
567    }
568}
569
570
571#
572# runpod2html - invokes pod2html to convert a .pod or .pm file to a .html
573#  file.
574#
575sub runpod2html {
576    my($pod, $doindex) = @_;
577    my($html, $i, $dir, @dirs);
578
579    $html = $pod;
580    $html =~ s/\.(pod|pm)$/.html/g;
581
582    # make sure the destination directories exist
583    @dirs = split("/", $html);
584    $dir  = "$htmldir/";
585    for ($i = 0; $i < $#dirs; $i++) {
586        if (! -d "$dir$dirs[$i]") {
587            mkdir("$dir$dirs[$i]", 0755) ||
588                die "$0: error creating directory $dir$dirs[$i]: $!\n";
589        }
590        $dir .= "$dirs[$i]/";
591    }
592
593    # invoke pod2html
594    print "$podroot/$pod => $htmldir/$html\n" if $verbose;
595#system("./pod2html",
596        Pod::Html'pod2html(
597        #Pod::Html'pod2html($pod2html,
598        "--htmldir=$htmldir",
599        "--htmlroot=$htmlroot",
600        "--podpath=".join(":", @podpath),
601        "--podroot=$podroot", "--netscape",
602        "--header",
603        ($doindex ? "--index" : "--noindex"),
604        "--" . ($recurse ? "" : "no") . "recurse",
605        ($#libpods >= 0) ? "--libpods=" . join(":", @libpods) : "",
606        "--infile=$podroot/$pod", "--outfile=$htmldir/$html");
607    die "$0: error running $pod2html: $!\n" if $?;
608}
609
610sub htmlize { htmlify(0, @_) }
Note: See TracBrowser for help on using the repository browser.