#!/usr/local/bin/perl5.8.8 -wT eval 'exec /usr/local/bin/perl5.8.8 -wT -S $0 ${1+"$@"}' if 0; # not running under some shell # # Produce an atom feed of subversion commits. # # @(#) $Id: subatom 1264 2006-09-27 15:56:29Z dom $ # use strict; use warnings; use File::Basename qw( basename dirname ); use Getopt::Std qw( getopts ); use XML::Atom::SimpleFeed; our $VERSION = '0.12'; my $me = basename $0; sub usage; # Ensure we know what we're doing for tainting. local $ENV{ PATH } = join ':', qw( /bin /usr/bin /usr/local/bin ); my %opt; getopts( "c:m:o:v:s:", \%opt ) or usage; if ( $opt{ c } ) { run_one_config( $_ ) foreach read_config( $opt{ c } ); exit 0; } my $config = {}; $config->{ max_entries } = untaint( max_entries => $opt{ m } ) if $opt{ m }; $config->{ output_file } = untaint( output_file => $opt{ o } ) if $opt{ o }; $config->{ viewcvs_url } = untaint( viewcvs_url => $opt{ v } ) if $opt{ v }; $config->{ self_url } = untaint( self_url => $opt{ s } ) if $opt{ s }; my $repos = shift or usage; $config->{ repos_url } = untaint( repos_url => $repos ); $config->{ path } = [ map { untaint( path => $_ ) } @ARGV ]; apply_defaults( $config ); run_one_config( $config ); exit 0; sub usage { die <as_string; if ( $config->{ output_file } ) { write_feed_to_file( $xml => $config->{ output_file } ); } else { print $xml; } return; } sub fetch_log_items { my ( $config ) = @_; # See perlipc(3) for details about all this. my $pid = open my $fh, '-|'; if ( $pid ) { binmode $fh, ':utf8'; my @items = process_log_lines( $config, $fh ); close $fh; return @items; } else { # Ensure that we get subversion to generate times with no # timezone. This means that they match the Atom expectation. local $ENV{ TZ } = 'UTC'; # Try to coerce subversion into spitting out UTF-8. local $ENV{ LC_ALL } = 'en_US.UTF-8'; my @cmd = ( 'svn', 'log', '-v' ); push @cmd, '--limit', $config->{ max_entries } if `svn log --help` =~ m/--limit/; push @cmd, $config->{ repos_url }, @{ $config->{ path } }; exec @cmd or die "$me: exec(@cmd): $!\n"; # NOTREACHED } } sub process_log_lines { my ( $config, $fh ) = @_; my ( @items, $current_item, $in_paths ); while ( <$fh> ) { 1 while chomp; if ( m/^-+$/ ) { push @items, $current_item if $current_item; last if @items >= $config->{ max_entries }; $current_item = {}; } elsif ( m/^r\d+ / ) { my ( $ver, $user, $date, $lines ) = split / \| /; $ver =~ s/^r//; $date =~ s/ \(.*\)//; $lines =~ s/ .*//; $current_item->{ revision } = $ver; $current_item->{ date } = $date; $current_item->{ user } = $user; $current_item->{ lines } = $lines; } elsif ( m/^Changed paths:/ ) { $in_paths = 1; } elsif ( m/^$/ ) { $in_paths = 0; } elsif ( $in_paths ) { push @{ $current_item->{ affected } }, $_; } else { push @{ $current_item->{ msg } }, $_; } } # Use up remaining lines. This lets us avoid a SIGPIPE warning from svn. 1 while <$fh>; return @items; } sub item_link { my ( $config, $item ) = @_; if ( $config->{ viewcvs_url } ) { return $config->{ viewcvs_url } . $item->{ revision }; } else { # Just link to the first changed file. my $path = ( split ' ', $item->{ affected }[0] )[1]; return $config->{ repos_url } . $path; } } sub item_to_entry { my ( $config, $feed, $item ) = @_; # Use the internal writer to do this. Slightly sneaky. my $cdata = \&XML::Atom::SimpleFeed::_cdata; my $msg = $cdata->( join( "\n", @{ $item->{ msg } || [] } ) ); my $summary = $msg; $msg = "

$msg

\n"; $msg .= "

Affected files:

\n"; $msg .= "\n"; $feed->add_entry( id => $config->{ repos_url } . '#r' . $item->{ revision }, title => item_title( $item ), updated => svn_date_to_atom_date( $item->{ date } ), content => $msg, summary => $summary, author => $item->{ user }, link => item_link( $config, $item ), ); return; } sub svn_date_to_atom_date { my ( $svn_date ) = @_; # Change from '2005-08-01 11:07:02 +0000' to # '2005-08-01T12:07:02Z'. Because we specified "TZ" when running # svn, we can rely on the time being zulu time. $svn_date =~ s/ /T/; $svn_date =~ s/ \+\d\d\d\d/Z/; return $svn_date; } sub item_title { my ( $item ) = @_; my @files = sort { length( $a ) <=> length( $b ) } map { ( split ' ' )[1] } @{ $item->{ affected } }; my $d = dirname( $files[0] ); return "r$item->{revision} - $d"; } sub title_for_feed { my ( $config ) = @_; return $config->{ feed_title } if $config->{ feed_title }; my $title = "Recent commits to $config->{repos_url}"; $title .= " for @{$config->{path}}" unless @{ $config->{ path } } == 1 && $config->{ path }[0] eq '/'; return $title; } sub make_feed_from_items { my ( $config, $items ) = @_; my %feed_params = ( id => $config->{ repos_url }, title => title_for_feed( $config ), updated => svn_date_to_atom_date( $items->[0]{ date } ), ); $feed_params{ link } = { rel => 'self', href => $config->{ self_url } } if $config->{ self_url }; my $feed = XML::Atom::SimpleFeed->new( %feed_params ); item_to_entry( $config, $feed, $_ ) foreach @{ $items }; return $feed; } # Write out a feed to the file, taking care to avoid writing unless # the feed we've just generated is actually newer than the one on # disk. This is needed for correct If-Modified-Since support. sub write_feed_to_file { my ( $feed, $file ) = @_; if ( -f $file ) { # XXX This is quite evil and I should use a proper XML parser. open my $fh, '<', $file or die "$me: open($file): $!\n"; my $old_feed = do { local $/; <$fh> }; close $fh; my ( $old_updated ) = $old_feed =~ m{(.*?)}; my ( $new_updated ) = $feed =~ m{(.*?)}; return if $new_updated le $old_updated; } open my $fh, '>', $file or die "$me: open(>$file): $!\n"; print $fh $feed; close $fh; } sub valid_command { my ( $cmd ) = @_; # A command exists if it has an untaint routine. no strict 'refs'; return defined *{ "untaint_$cmd" }{ CODE }; } sub untaint_max_entries { m/^(\d+)$/; $1 } sub untaint_output_file { m/^([-\w\/.]+)/; $1 } sub untaint_path { m!^(/[-/\w.\@]+)$!; $1 } sub untaint_feed_title { m/^(.*)$/; $1 } sub untaint_repos_url { m/^((?:https?|file|svn|svn\+ssh):\/\/[-\/\w.\@~]+)/; my $url = $1; $url =~ s/\/+$//; # Remove trailing slash. return $url; } sub untaint_self_url { m/^((?:https?|file):\/\/[-\/\w.\@~?&=]+)/; $1 } sub untaint_viewcvs_url { m/^((?:https?|file):\/\/[-\/\w.\@~?&=]+)/; $1 } sub untaint { my ( $cmd, $arg ) = @_; local $_ = $arg; $cmd =~ m/^(\w+)$/; # untaint cmd... return eval "untaint_$1()"; } sub apply_defaults { my ( $config ) = @_; $config->{ max_entries } ||= 30; $config->{ path } ||= ['/']; $config->{ path } = ['/'] if @{ $config->{ path } } == 0; return; } sub is_good_config { my ( $config ) = @_; return $config->{ repos_url } && $config->{ output_file }; } sub read_config { my ( $file ) = @_; my @todo; my $current = {}; my $next_stanza = sub { if ( is_good_config( $current ) ) { apply_defaults( $current ); push @todo, $current; } else { warn "$me: $file: ignoring incomplete stanza before line $.\n"; } $current = {}; }; open my $fh, '<', $file or die "open($file): $!\n"; while ( <$fh> ) { 1 while chomp; # When you see a blank line, it introduces a new config stanza. $next_stanza->() if /^$/ && %$current; # Strip comments. s/#.*//; my ( $cmd, $arg ) = split ' ', $_, 2; next unless $cmd; if ( !valid_command( $cmd ) ) { warn "$me: $file: unknown command '$cmd' on line $.\n"; next; } my $untainted_arg = untaint( $cmd, $arg ); warn "$me: $file: using untainted value '$untainted_arg' on line $.\n" if $arg ne $untainted_arg; if ( $cmd eq 'path' ) { push @{ $current->{ path } }, $untainted_arg; } else { $current->{ $cmd } = $untainted_arg; } } $next_stanza->() if %$current; return @todo; } __END__ =pod =head1 NAME subatom - produce an atom feed from subversion commits =head1 SYNOPSIS subatom [-m max] [-o output] [-v viewcvs] REPOSITORY_URL [path ...] subatom -c config_file =head1 DESCRIPTION B is a small script to produce an Atom feed from subversion commits. You can use this with a feed reader to see new commits to your repository. The first argument is the URL for your subversion repository. The remaining arguments are paths within the repository for which you would like commit messages. If you don't specify any, it will default to the entire repository. The I<-m> flag states how many entries you would like in the feed. The default is 30. If you pass a filename using the I<-o> flag, the output will be written there instead of STDOUT. If the filename already exists then the timestamp will be checked and the new feed will only be written if it actually contains any newer entries. If you will be serving the feed file using a web server, then doing this enables If-Modified-Since to work correctly (ie: feed readers will only download the file if it has actually changed). You can pass an URL using the I<-v> flag, and B will create a link for each commit by appending the revision number to the URL. For example a viewcvs URL might look like this (on the command line). http://example.com/viewcvs/svn/?view=rev&rev= If the URL has question marks or ampersands, don't forget to quote it on the command line. If you don't, the shell will interpret them and probably give some odd looking errors. If you don't pass in a URL, a link will be automatically made to the first file that changed in this commit. If you pass an URL using the I<-s> flag, it will be used as the I link for the feed. Use this if you would like to avoid warnings from the feed validator. =head1 CONFIG FILE In order to avoid wrapper scripts calling subatom with lots of parameters, you can also specify a config file. The format of the file consists of lines containing key-value pairs separated by whitespace. A blank line introduces a new stanza. You can generate multiple feeds by specifying several stanzas. The following keywords are recognized. =over 4 =item I How many entries should be in the feed? Defaults to 30. =item I Where should the feed be written to? Must be specified. =item I The path to the root of the repository. Must be specified. B: I likes to have the trailing slash removed, if you get any complaints, try taking it off. =item I Specifies a directory in the repository to monitor commit messages for. Defaults to '/'. =item I Specifies the URL of the feed that will be produced. No default. =item I Specifies an URL prefix which will have the revision number appended to produce the link for each commit. For example, if you have a trac setup, use something like this: viewcvs_url http://dev.rubyonrails.org/changeset/ =item I Specify the title for the feed as a whole. Defaults to "Recent commits to $repos_url". =back =head2 EXAMPLE output_file /www/svn.atom path /trunk repos_url http://svn.collab.net/repos/svn self_url http://localhost/svn.atom viewcvs_url http://svn.collab.net/viewvc/svn?view=rev&revision= feed_title Subversion Changes output_file /www/rails.atom path /trunk repos_url http://dev.rubyonrails.org/svn/rails self_url http://localhost/rails.atom viewcvs_url http://dev.rubyonrails.org/changeset/ =head1 SEE ALSO L L L =head1 AUTHOR Dominic Mitchell Ecpan (at) happygiraffe.netE =head1 COPYRIGHT AND LICENSE Copyright (C) 2004 by Dominic Mitchell. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: =over 4 =item 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. =item 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. =back THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =cut # vim: set ai et sw=4 :