#!/usr/bin/perl -w =head1 NAME pod2html -- replacement for buggy pod2hmtl that comes with the perl distribution =head1 SYNOPSIS pod2html *.pod =head1 DESCRIPTION I was sadly disappointed with the F program that came with the perl distribution. (Try running it on the makepp pod files and compare the output with the output from this program! It's full of bugs.) This program fixes the bugs that I know of, and includes the following refinements: =over 4 =item * Correctly handles links with special characters in them. =item * Correctly formats lists like =over 4 =item * This is an item =back =item * Finds a lot more candidate links, because it analyzes a group of related pod files all at once. =back This is a simple replacement that is designed to handle linking between different related pod files better. =cut =head2 parse_files Parse a group of related pod files. For each F<.pod> file, writes out a corresponding F<.html> file in the same directory. pod2html("html_output_dir", glob("*.pod")); =cut sub pod2html { my $html_dir = shift @_; -d $html_dir || mkdir($html_dir, 0777) or die "$0: can't write to output directory $html_dir--$!\n"; # # First scan all the .pod files, looking for anything which we can link to. # This will accumulate a list of possible targets in the array %link_targets. # local %link_targets; my (%title, %has_other_sections); foreach my $file (@_) { ($title{$file}, $has_other_sections{$file}) = prescan($file, \%link_targets); if ($file =~ /([^\/]+)\.[^\.\/]+$/) { $link_targets{$1} = "$1.html"; # Put in a link target for each .pod file. } } # # Now parse each file and convert it to HTML: # foreach my $file (@_) { my $html_file = $file; $html_file =~ s@^.*/@@; # Strip off the directory info. $html_file =~ s/\.[^\.]+$/\.html/; # Change the suffix. $html_file = "$html_dir/$html_file"; # Put it in the correct directory. convert_pod_file($file, $html_file, $title{$file}, $has_other_sections{$file}, \%link_targets); # Convert this file. } } # # Convert a pod file into HTML. # # Arguments: # a) The pod file. # b) The HTML output file. # c) The title to put in the HTML output. # d) Whether to make the =head1 sections separate HTML sections or not. # If there's only a DESCRIPTION, a NAME, and an AUTHOR, then there's no # need to do so. # c) The %link_targets array. # sub convert_pod_file { local $podfile = shift @_; my ($htmlfile, $html_title, $head1_sections, $link_targets) = @_; my $pod_sections = parse_pod_sections($podfile); local *HTML; # Make local file handles. open(HTML, "> $htmlfile") || die "$0: can't open $htmlfile--$!\n"; print HTML " $html_title \n"; if (!$head1_sections) { # If we won't put a NAME section in, add a print HTML "

$html_title

\n"; # title now. $pod_sections = remove_head1($pod_sections); # Get rid of the sections we won't be # writing out. } add_table_of_contents($pod_sections); # Put an index at the beginning. !$head1_sections and print HTML "
"; format_to_html($pod_sections, 1, $link_targets); # Write each section. print HTML "\n"; close HTML; } # # Remove the =head1 sections since we aren't going to be using them. # Arguments: # a) The lists returned from parse_pod_sections. # # Returns a modified list with useless stuff removed. # sub remove_head1 { my $paragraphs = $_[0]; my @ret_paragraphs; # # Remove everything but the DESCRIPTION section: # my $in_description = 0; foreach my $para (@$paragraphs) { next if ref($para); # Not a scalar; if ($para =~ /^=head1\s+(\w+)/) { $in_description = $1 eq 'DESCRIPTION'; $para = ''; # Suppress this field. } } continue { $in_description and push @ret_paragraphs, $para; } return \@ret_paragraphs; } # # Add a table of contents to the HTML file: # sub add_table_of_contents { my $paragraphs = $_[0]; my $printed_anything = 0; foreach my $para (@$paragraphs) { if (ref($para)) { # Nested structure? add_table_of_contents($para); } elsif ($para =~ /^=head.\s+(.*?)\s*$/) { # New section? print HTML "" if $printed_anything; } # # Format one level of the output from parse_pod_sections(). Arguments: # a) A reference to a list of things at this level. # b) The number of the level we're at. # c) Whether =head1 sections go into

sections or not. # d) The hash of valid link targets. # # Writes to the global file handle HTML. # sub format_to_html { my ($paragraphs, $level, $link_targets) = @_; my $end_section = ''; my $need_p = 0; for (my $paridx = 0; $paridx < @$paragraphs; ++$paridx) { my $paragraph = $paragraphs->[$paridx]; if (ref($paragraph)) { # Nested level here? format_to_html($paragraph, $level+1, $link_targets); } elsif ($paragraph =~ /^=head(.)\s+(.*?)\s*$/s) { # Heading line? my $level = $1; my $text = $2; if ($level == 1) { # Special handling for head1 sections: if ($text eq 'AUTHOR') { print "

"; next; } } print HTML "

" if $level == 1; # Make it look more dramatic. print HTML "", link_name($text), format_paragraph($text, $link_targets), "\n"; } elsif ($paragraph =~ /^=over\s+(\d+)\s*$/) { # Some sort of list? # # We have to figure out what kind of list it is. If all the items are # =item *, then we should use a