\n"; }
$need_p = 0;
}
elsif ($paragraph =~ /^=back\s*$/) { # End of list?
print HTML $end_section;
$need_p = 0;
}
elsif ($paragraph =~ /^=/) { # Something we don't recognize?
die "$0: unrecognized directive $paragraph";
}
else { # Just a regular text section?
$need_p and print HTML "\n"; # Put paragraph delimeter.
print HTML format_paragraph($paragraph, $link_targets);
$need_p = 1;
}
}
}
#
# Set up the HTML links for a given name:
#
sub link_name {
my $target_name = $_[0];
my $ret_str = '';
if ($target_name =~ s/\s*X<(.*?)>\s*//) { # Index entry?
$ret_str = qq[];
}
$ret_str .= qq[];
if ($target_name =~ /^C\<(\S+?)\>/) { # A program identifier word?
$ret_str .= qq[];
}
return $ret_str;
}
#
# Return a URL fragment (the part after the #) for a given string.
#
sub url_fragment {
my $str = $_[0];
$str =~ s/\s+/ /g; # Convert multiple spaces to a single space.
$str =~ s/^\s//; # Strip leading whitespace.
$str =~ s/\s$//; # Strip trailing whitespace.
$str =~ s{(\W)}{sprintf("%%%02x", ord($1))}eg; # Protect special characters.
return $str;
}
#
# Format a paragraph which is supposed to be straight text.
# Returns the string (doesn't print it to the HTML filehandle).
# Arguments:
# a) The text string.
# b) The hash of valid links.
#
sub format_paragraph {
local $_ = $_[0]; # Access the text.
my $link_targets = $_[1]; # Access hash of valid targets.
if (/^[ \t]/) { # Indented text? Treat as verbatim.
s/\\</g; # Protect some special symbols.
s/\>/\>/g;
return "
\n" . insert_crosslinks($_, $link_targets) . "
";
}
#
# It's not verbatim text. Parse it apart, handling the various different
# kinds of attributes:
# B
# C
# E
# F
# I
# L
# L
# L
# S
# X
# Z<>
# Also each of these has a << >> counterpart as well.
#
s/\s+$//; # Strip trailing whitespace.
my @strings = (''); # The strings available on each level of
# angle bracket nesting. $strings[0] will
# be the final text.
my @pod_directive = (''); # What character began this angle bracket
# expression.
pos($_) = 0;
while (pos($_) < length($_)) {
if (!defined(pos($_))) {
warn "Something's wrong here!\n";
}
if (/\G([BCEFILSXZ])(<<+)\s+/gc) { # Multiple angle brackets?
my $open_char = $1;
my $opening_str = "$1$2";
my $closing_string = ">" x length($2);
# Get the closing string.
if (/\G(.*?)\s+$closing_string/gc) { # Find the matching closing string.
$strings[-1] .= format_pod_directive($open_char, $1, $link_targets);
# Do something with it.
next;
} else {
warn "Couldn't find match to $opening_str\n";
}
}
elsif (/\G([BCEFILSXZ])/gc) { # Closing angle bracket?
if (@strings > 1) { # Anything on the stack?
my $directive_text = pop @strings;
$strings[-1] .= format_pod_directive(pop @pod_directive, $directive_text, $link_targets);
# Format that text.
}
}
elsif (/\G([^BCEFILSXZ<>]+)/gc) { # Chars we don't care about?
my $text = $1;
if (@strings == 1) { # Not in a pod directive?
$text =~ s/\s\"/ &\#147;/g; # Try to make opening and closing quotes
$text =~ s/\"([\W])/&\#148;$1/g; # look nice.
}
$strings[-1] .= $text;
}
elsif (/\G([BCEFILSXZ])/gc){ # Must be one of these, not followed by .
$strings[-1] .= $1;
}
elsif (/\G\/gc) {
$strings[-1] .= ">"
}
else {
die "How did I get here?";
}
}
return $strings[0];
}
#
# Format some text according to a particular pod directive character.
# Arguments:
# a) The directive char (B, C, E, F, I, L, S, X, or Z).
# b) The text.
# c) The hash of valid HTML link targets
#
# Returns the HTML string corresponding to that format.
#
sub format_pod_directive {
my ($dir_char, $text, $link_targets) = @_;
if ($dir_char eq 'B') {
return "$text";
}
elsif ($dir_char eq 'C') {
return "" . insert_crosslinks($text, $link_targets) . "
";
# Try to find linkable items if it's marked
# as code.
}
elsif ($dir_char eq 'E') {
$text eq 'verbar' and return "|";
$text eq 'sol' and return '/';
if ($text =~ /^(?:0x[0-9A-Fa-f]+|\d+)$/) { # Numeric HTML code?
$text =~ /^0/ and return "" . oct($text) . ";";
return "$text;";
}
return "&$text;"; # HTML formatting code.
}
elsif ($dir_char eq 'F') {
return "$text";
}
elsif ($dir_char eq 'I') {
return "$text";
}
elsif ($dir_char eq 'L') { # Direct link?
if ($text =~ /^(\w+):/) { # Absolute URL?
return "$text";
}
my $displayed_text = $text;
if ($text =~ /^([^\|]+)\|(.*)$/s) { # L?
$displayed_text = $1;
$text = $2; # $text is now what to link to.
}
if ($text =~ m@^(\w*)/(.*)$@s) { # Section within another page?
my $fname = $1;
my $section = $2;
$section =~ s/\"//g; # Strip out any quotes.
$section =~ s/\s+/ /g; # Convert multiple spaces to single spaces.
$section =~ s/^\s//; # Strip leading whitespace.
$section =~ s/\s$//; # Strip trailing whitespace.
if ($fname =~ /^\s*$/) { # No filename specified?
$fname = $podfile; # Use the filename of the current file,
$fname =~ s/\.[^\.]+$//; # with the extension stripped off.
}
if ($link_targets->{$section}{"$fname.pod"} ||
$link_targets->{$section}{"$fname.pm"}) { # Recognized target?
my $url_fragment = url_fragment($section);
return "$displayed_text";
}
else {
warn "Unrecognized link target '$text'\n";
return $displayed_text;
}
}
else { # Refer to the whole page?
-f "$text.html" || -f "$text.pod" or warn "Unrecognized link target '$text'\n";
return "$displayed_text";
}
}
elsif ($dir_char eq 'S') { # Non-breaking text?
$text =~ s/\s/ /g;
return $text;
}
elsif ($dir_char eq 'X') { # Index entry?
return qq[];
}
elsif ($dir_char eq 'Z') { # Null formatting code?
return '';
}
else {
die "Invalid code $dir_char"; # Should never get here.
}
}
#
# See if we can insert any cross-links into some text. Looks for strings
# which might be in the index of available things.
#
# Right now we only recognize whole words.
# Arguments:
# a) The text to format.
# b) The hash of valid link targets.
#
sub insert_crosslinks {
my ($text, $link_targets) = @_;
$text =~ s{([\#\$\@\%]?[\w:]+)}{
if ($link_targets->{$1}) { # Any possibility of linking?
my $word = $1;
if (ref($link_targets->{$word}) eq '') { # Not a hash, just an absolute?
qq[$word];
}
elsif (!defined($link_targets{$word}{$podfile})) { # Not known from this file?
my (@podfiles) = keys %{$link_targets{$word}}; # See how many other files
# we could conceivably link to.
if (@podfiles == 1) { # Only one candidate?
my $htmlfile = $podfiles[0];
$htmlfile =~ s/\.[^.]+$/.html/; # Get the correct filename.
$htmlfile =~ s@^.*/@@; # Strip out directory info.
qq[] . $word . "";
}
else {
$word; # Multiple candidates, don't try to link.
}
} else { # Link to somewhere in this file:
qq[$word];
}
} else {
$1;
}
}eg;
return $text;
}
#
# Read a pod file and return a hierarchical set of lists that describes
# the structure of the file.
# Argument: The name of the file.
#
# Returns a reference to an array of arrays, which is formatted like this:
#
# [ ["=head1 NAME", "perl - Practical Extraction and Report Language"],
# ["=head1 SYNOPSIS", "..."],
# ["=head1 DESCRIPTION", "...",
# ["=over 4", ["=item *", ...], ["=item *", ...], "=back"] ] ];
#
sub parse_pod_sections {
my $fname = $_[0];
local *POD;
open(POD, $fname) || die "$0: can't read $fname--$!\n";
my @level_stack; # Where we build up the parsed file.
local $_;
my $in_pod = 0;
while (defined($_ = get_paragraph(\*POD))) {
if (/^=/) { # Beginning of pod section?
$in_pod = 1;
}
next unless $in_pod; # Skip over any perl code.
if (/^=head(\d)/) { # Heading of some level?
my $new_level = $1;
while (@level_stack > $new_level) { # At too high a level now?
my $last_level = pop @level_stack;
push @{$level_stack[-1]}, $last_level;
}
while (@level_stack < $new_level) { # At too low a level?
push @level_stack, []; # Add a new level.
}
}
elsif (/^=over/) { # Indent some more?
push @level_stack, []; # Add a new level.
}
if (/^[ \t]/) { # Try to coalesce separate verbatim sections:
if ($level_stack[-1][-1] =~ /^[ \t]/) {
$level_stack[-1][-1] .= $_;
next;
}
}
push @{$level_stack[-1]}, $_; # Save this text.
if (/^=back/) { # Done with indent?
$level_stack[-1][0] =~ /^=over/ or
die "$0: misplaced =back directive in $fname\n";
my $last_level = pop @level_stack;
push @{$level_stack[-1]}, $last_level;
# Go up one level.
}
}
close POD;
while (@level_stack > 1) { # Do final cleanup:
my $last_level = pop @level_stack;
push @{$level_stack[-1]}, $last_level;
}
return $level_stack[0];
}
#
# Scan a single pod file, looking for all possible link targets, and extracting
# other useful information. A link target
# is anything after an =head or =item directive in the pod file.
# Arguments:
# a) The name of the file to scan.
# b) The array to store the link targets in. This is a 2D associative array
# which is indexed like this:
# $link_targets{$target_name}{$podfile_name} = 1;
# Returns a list consisting of the following information:
# 0: The title of the file (from the NAME section).
# 1: Whether there are any level 1 sections beyond NAME and DESCRIPTION and
# AUTHOR.
#
sub prescan {
my ($fname, $link_targets) = @_;
local *FH; # Make a local file handle.
open(FH, $fname) || die "$0: can't read $fname--$!\n";
local $_;
my $in_pod = 0;
my $in_NAME = 0;
my $title;
my $other_top_level_sections = 0;
while (defined($_ = get_paragraph(\*FH))) { # Read another paragraph?
if (/^=/) { $in_pod = 1; $in_NAME = 0; } # Found a pod section?
if ($in_pod) {
if (s/X<(.*?)>//) { # Index entry specification?
$link_targets->{$1}{$fname} = 1; # Put into the index.
}
if (/^=head1\s+(.*?)\s*$/) {
if ($1 eq 'NAME') { # Top-level NAME section?
$in_NAME = 1; # Remember to snag the name.
next;
}
elsif ($1 ne 'DESCRIPTION' && $1 ne 'AUTHOR') {
$other_top_level_sections = 1;
}
}
elsif (/^=(?:item|head[2-9])\s+(.*)$/s) { # A target of a link?
my $target_name = $1; # Canonicalize the target name:
next if length($target_name) <= 1; # Do not link to single characters.
$target_name =~ s/\s+/ /; # Convert whitespace into single spaces.
$target_name =~ s/^\s//; # Strip leading whitespace.
$target_name =~ s/\s+$//; # Strip trailing whitespace.
$link_targets->{$target_name}{$fname} = 1;
# Remember that this target exists.
} elsif (/^=cut/) { # Leaving a pod section?
$in_pod = 0;
$in_NAME = 0;
}
elsif (! /^=/) { # Not a pod directive?
if ($in_NAME) { # Looking for the title?
$title = $_; # Store the title for later.
}
}
}
}
close FH;
if (!$other_top_level_sections) { # If this isn't a real man page, but just
# a continuation page (e.g., like perlvar
# instead of perl), we might want to strip the
# man page name from the title.
if ($title =~ /^\s*(\w+)\s*-+\s*(.*)$/s) { # Follows typical format?
if (lc($1) eq lc(substr($fname, 0, length($1)))) {
$title = $2; # Ignore the duplicated part of the file name.
}
}
}
$title =~ s/^\s+//; # Strip leading whitespace.
$title =~ s/\s+$//; # Strip trailng whitespace.
return ($title, $other_top_level_sections);
}
#
# Return a paragraph from a file.
# We don't simply set $/ = '' because then if there is extra trailing
# whitespace on the blank line between paragraphs, it doesn't see the
# paragraph break.
#
sub get_paragraph {
my $fh = $_[0];
my $str;
my $line;
while (defined($line = <$fh>)) {
$str .= $line;
last if $line =~ /^\s*$/; # Blank line.
}
return $str;
}
pod2html(@ARGV);
=head1 AUTHOR
Gary Holt (holt-makepp@gholt.net)
29 June 2003
=cut