package Alexandria::Client; # # adocman - Automation tool for SourceForge.net DocManager handling # # Copyright (C) 2002-2004 Open Source Development Network, Inc. ("OSDN") # Copyright (C) 2004-2005 OSTG, Inc. ("OSTG") # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), # to deal in the Software without restriction, including without limitation # the rights to use, copy, modify, merge, publish, distribute, sublicense, # and/or sell copies of the Software, and to permit persons to whom the # Software is furnished to do so, subject to the license details found # below in the section marked "$LICENSE_TEXT". # # $Id: Client.pm,v 1.4 2005/03/03 14:52:53 n_oostendorp Exp $ # # Written by Jacob Moorman ########################################################################### # Use all strictness, list intended global variables use strict; use vars qw($ORIGINAL_ZERO $LICENSE_TEXT $VERSION $CUSTOMVERSION $USERAGENT); use vars qw(%config $cookie_jar $ua $valid_sfnet_cookie %defconfig); use vars qw(@downloaddirfilelist); # Modify the program name in process listings, i.e. 'ps' (most platforms) #$ORIGINAL_ZERO = $0; #$0 = "adocman"; $LICENSE_TEXT = ' Copyright (c) 2002-2004 Open Source Development Network, Inc. ("OSDN") Copyright (c) 2004 OSTG, Inc. ("OSTG") Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 2. Neither the names of VA Software Corporation, OSDN, OSTG, SourceForge.net, the SourceForge.net Site Documentation project, nor the names of its contributors may be used to endorse or promote products derived from the Software without specific prior written permission of OSTG. 3. The name and trademarks of copyright holders may NOT be used in advertising or publicity pertaining to the Software without specific, written prior permission. Title to copyright in the Software and any associated documentation will at all times remain with copyright holders. 4. If any files are modified, you must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. We recommend that you provide URLs to the location from which the code is derived. 5. Altered versions of the Software must be plainly marked as such, and must not be misrepresented as being the original Software. 6. The origin of the Software must not be misrepresented; you must not claim that you wrote the original Software. If you use the Software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 7. The data files supplied as input to, or produced as output from, the programs of the Software do not automatically fall under the copyright of the Software, but belong to whomever generated them, and may be sold commercially, and may be aggregated with the Software. 8. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE OR DOCUMENTATION. This Software consists of contributions made by OSTG and many individuals on behalf of OSTG. Specific attributions are listed in the accompanying credits file. '; ########################################################################### # Major TODO list (aside from FIXME entries throughout program) # # REQUIRE SERVER-SIDE FUNCTIONALITY # Verify that we are a member of the specified group (--groupid) # Add --list=states once this info is available server-side # Add --validate=type # * verify that unwanted HTML tags are not present in the document # * field length checks # # DO NOT REQUIRE SERVER-SIDE CHANGES # Utility functionality to be incorporated: # * report regarding parameters that are provided, but not used (verbose) # * check permissions on files that could contain private info (loud) # * add the ability to check to see if there is a more recent version # of this tool available (i.e. hit a lookup page on the sitedocs project # web site on SF.net); handle custom versions for this tool (provide # a mechanism to check main version, and custom versions) # Check for cookie expiration # Add --compare=TYPE (all, or field name) # Add --comparestyle with 'boolean', 'diff' and 'cvsversion' options ########################################################################### # Modules use Crypt::SSLeay; # for SSL support (https:// URLs) use Digest::MD5; # for document data comparison use File::stat; # for retrieving file information use HTML::TokeParser; # for parsing of HTML documents use HTTP::Cookies; # for cookie support use HTTP::Request::Common; # for performing FORM submissions use IO::File; # for temp file creation use LWP::UserAgent; # for basic browser functionality use Pod::Usage; # for command-line usage use POSIX qw(strftime); # for date/time handling use Term::ReadKey; # for password collection # Define version and UA string, SF.net host $CUSTOMVERSION = ""; # Enable the next line if this is a custom (modified) version of this script # $CUSTOMVERSION = "-custom-your_text_goes_here"; $VERSION = "0.13". "$CUSTOMVERSION"; $USERAGENT = "adocman/$VERSION (libwww-perl/$LWP::VERSION)"; # Initialize global variables %config = (); # Initialize hash for command-line/config file sub BEGIN { use Exporter (); use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); @ISA=qw(Exporter); @EXPORT=qw( $ORIGINAL_ZERO $LICENSE_TEXT $VERSION $CUSTOMVERSION $USERAGENT %config $cookie_jar $ua $valid_sfnet_cookie %defconfig adocman_prepare adocman_version adocman_usage util_checkforvariableconflicts util_currenttime util_expandtildes util_extractform util_extracturls util_filewriter util_getselectoptions util_interactive util_mkdir util_newcookiejar util_newuseragent util_output util_outputselectoptions util_parseconfig util_processcommandline util_readdownloaddir util_setdefault util_verifycookie util_verifyvariables verifylogin request ); } sub request { $ua->request(@_) } ########################################################################### # # Sub new # # Creates an Alexandria Client object, and initializes default values # sub new { my $this = {}; bless $this; # Set default values for global variables util_setdefault("early"); # Process command-line options util_processcommandline(""); # Display usage or version information, if requested # (this is done early to cut down on output under --verbose) if (exists($config{help})) { adocman_usage(""); } elsif (exists($config{version})) { adocman_version(""); } # Apply default values for global variables, as needed util_setdefault("middle"); # Read configuration file, if available if (-r util_expandtildes($config{configfile})) { util_parseconfig(""); } # Check for variable conflicts (two variables that should not be set together) util_checkforvariableconflicts(""); if (exists($config{debug})) { exit(0); } # Apply default values for global variables, as needed util_setdefault("late"); # Set the umask umask($config{fileumask}); # Establish the cookie jar util_newcookiejar(""); # Establish the UserAgent util_newuseragent(""); #log the client in if the proper parameters have been #provided if (exists($config{login})) { $this->login(); exit(0); } elsif (exists($config{logout})) { $this->logout(); exit(1); } #at this point verify that we have a valid user $this->verifylogin(); return $this; } ########################################################################### # sfnet_* - Communicate with SF.net servers # Login to the SourceForge.net site sub login { my ($this) = @_; util_verifyvariables("username"); util_output("verbose", "Logging in as $config{username}."); # If our password is not already set if (!defined($config{password})) { if (defined($config{interactive})) { # Retrieve our password interactively util_interactive(""); } else { util_verifyvariables("password"); } } # Clear the cookie jar, in case we are login in as a diff. user $cookie_jar->clear; # Perform the actual login operation my $res = $ua->request (POST $config{hosturl}. "/account/login.php", content => [ 'return_to' => '', 'form_loginname' => $config{username}, 'form_pw' => $config{password}, 'stay_in_ssl' => '1', 'login' => 'Login With SSL', ] ); # Check to see if we failed to get a result from the server unless ($res->is_success or $res->is_redirect) { util_output("verbose", "Failed to login."); util_output("die", $res->as_string(), "2"); } util_output("verbose", "Result returned from login page."); # Verify that we now have a valid login cookie $cookie_jar->scan( \&util_verifycookie ); if ($valid_sfnet_cookie) { util_output("verbose", "Login operation completed ". "successfully."); } else { util_output("die", "Login operation unsuccessful.", "2"); } } # Logout of the SourceForge.net site sub logout { my ($this) = @_; util_output("verbose", "Logging out."); # Perform the actual logout operation my $res = $ua->simple_request (GET $config{hosturl}. "/account/logout.php"); # Ensure that the login cookie is purged $cookie_jar->scan( \&util_verifycookie ); if ($valid_sfnet_cookie) { $cookie_jar->clear; } util_output("verbose", "Logout operation completed."); } ########################################################################### # adocman_* - Perform a core operation of adocman # Postprocess the data from STDIN, output DocManager-ready data to STDOUT sub adocman_prepare { # Undefine delimiters, so we can slurp all available data undef $/; my $html = ; # Strip document header and footer, if they exist $html =~ s/(.*)(\)(.*)/$5/sm; $html =~ s/(\<\/body>)(.*)//sm; # Convert SourceForge.net URLs to be absolute local my @acceptable_URI_types = qw(http https); my $uritype; foreach $uritype (@acceptable_URI_types) { $html =~ s/href\=\"$uritype:\/\/$config{hostname}\//href\=\"\//ism; } # Replace text with HTML entities (we are concened with &, > and <) $html =~ s/&(?!(?:[a-zA-Z0-9]+|#\d+);)/&/g; # FIXME handling for > and < # Trim whitespace from the beginning and end of our document $html =~ s/^\s+//; $html =~ s/\s+$//; print "$html\n"; END { close(STDOUT) or util_output("die", "Unable to close ". "STDOUT: $!\n", "1"); } } # Output program usage information and exit sub adocman_usage { my $exit_val=0; my $verbose=0; if (exists($config{verbose})) { $verbose=1; } if (exists($_[0])) { if ($_[0] eq "error") { $exit_val=1; } } pod2usage(-exitval => $exit_val, -verbose => $verbose); exit($exit_val); } # Output program version information and exit sub adocman_version { print "adocman version $VERSION\n"; if (exists($config{verbose})) { print $LICENSE_TEXT. "\n"; } exit (0); } ########################################################################### # util_* - Perform a utility function of adocman # Check to ensure that we have no variable conflicts; i.e. settings which # may not be combined sub util_checkforvariableconflicts { # Verify that only one of filename OR directoryname is set if (exists($config{filename}) && exists($config{directoryname})) { util_output("die", "Either filename OR directoryname ". "may be set at one time.", "4"); } # Verify that only one action has been requested my @validactions = qw(login logout createnew modify download docgroup list help version); my $actioncount = 0; my $action; foreach $action (@validactions) { if (defined($config{$action})) { $actioncount++; } } if ($actioncount > 1) { util_output("die", "Multiple actions specified; ". "only one may be specified at a time.", "4"); } elsif ($actioncount < 0) { util_output("die", "No actions specified; ". "one must be specified.", "4"); } # Verify that our filter is not being combined with --verbose if (defined($config{prepare}) && defined($config{verbose})) { util_output("die", "Use of --verbose with --prepare ". "is prohibited (may damage output).", "4"); } # Verify that we're not pulling a password both from # config/command-line, and using --interactive if (defined($config{interactive}) && defined($config{password})) { util_output("die", "Use of --password with ". "--interactive is prohibited.", "4"); } } # Return a date/time stamp (current GMT) in format YYYY-MM-DD HH:MM:SS sub util_currenttime { my $timestamp = strftime("%Y-%m-%d %H:%M:%S", gmtime()); return $timestamp; } # Expand tildes within a path; should be expanded to home directory for user sub util_expandtildes { # from Perl Cookbook, First Edition, Section 7.3 my $path = $_[0]; $path =~ s{ ^ ~ ( [^/]* ) } { $1 ? (getpwnam($1))[7] : ( $ENV{HOME} || $ENV{LOGDIR} || (getpwuid($>))[7] ) }ex; return $path; } # Extract a named form from a HTML document sub util_extractform { # FIXME: should have a check to make sure we have form content # that is requested my $html = $_[0]; my $formname = $_[1]; util_output("verbose", "Extracting form from HTML document."); # Remove the first part of the page, up to
$html =~ s/(\<\/form>)(.*)/$1/sm; return $html; } # Extract URLs from a HTML document sub util_extracturls { my $html = $_[0]; util_output("verbose", "Extracting URLs from HTML document."); my $parser = HTML::TokeParser->new(\$html); my %seen = (); while (my $token = $parser->get_tag("a")) { my $url = $token->[1]{href} || "-"; my $text = $parser->get_trimmed_text("/a"); $seen{$url} = $text; } return %seen; } # Open specified file, write a string to the file, then close the file sub util_filewriter { my $filename = util_expandtildes($_[0]); my $data = $_[1]; util_output("verbose", "Writing output to: $filename"); unless (open(OUTPUTFILE, ">$filename")) { util_output("die", "Unable to open file for writing: ". "$filename", "5"); } print OUTPUTFILE $data; close (OUTPUTFILE); } # Return an hash of names, values for HTML select options sub util_getselectoptions { my $html = $_[0]; my $parser = HTML::TokeParser->new(\$html); my $desired_select = $_[1]; my %result = (); util_output("verbose", "Parsing HTML as to derive select ". "options."); # Set this to 1 when we have located the desired select (i.e. when # it is time to start printing options. my $located_desired_select = 0; # Set this to 1 when we have finished with the options in that # select, so as that we don't print options from subsequent selects. my $finished_with_desired_select = 0; # While we still have tokens and have not yet finished with our select # i.e. we haven't hit another select while ((my $token = $parser->get_token()) && ($finished_with_desired_select != 1)) { # Check to see if the next token is a start tag my $type = $token->[0]; if ($type eq "S") { my $tag = $token->[1]; # If this is an option, we are in the right select, add # information about this option if (($tag eq "option") && ($located_desired_select == 1) && ($finished_with_desired_select != 1)) { my $value = $token->[2]{value}; my $label = $parser->get_trimmed_text; $result{$value} = $label; } elsif ($tag eq "select") { my $name = $token->[2]{name}; # If we've hit the right select, set flag if ($name eq $desired_select) { $located_desired_select = 1; } elsif ($located_desired_select) { # If we've hit a select, we're done $finished_with_desired_select = 1; } } } } return %result; } # Retrieve a password interactively sub util_interactive { my $max_wait = 120; # Wait up to 120 seconds for the password ReadMode('noecho'); util_output("normal", "Input will not echo to screen."); util_output("normal", "Waiting up to $max_wait seconds for ". "user input."); print "Enter your password: \n"; $config{password} = ReadLine(120); chomp $config{password}; ReadMode('normal'); if (!defined($config{password})) { util_output("die", "Timeout reached during wait ". "for password entry.", "6"); } } # Make a new directory sub util_mkdir { my $directoryname = util_expandtildes($_[0]); util_output("verbose", "Creating directory: $directoryname"); mkdir($directoryname, $config{dirmode}); if (! -d $directoryname) { util_output("die", "Directory may not be created: ". "$directoryname", "5"); } } # Establish a new cookie_jar instance sub util_newcookiejar { util_output("verbose", "Creating our cookie jar."); # FIXME: Should check on the permissions of the cookiefile $valid_sfnet_cookie = 0; # Create a new cookie_jar instance, saving cookies that we # recieve, and ignoring requests to discard cookies after the # session is over (i.e. so we don't need to login for each # operation. $cookie_jar = HTTP::Cookies->new( file => util_expandtildes($config{cookiefile}), autosave => 1, ignore_discard => 1); } # Establish a new UserAgent instance sub util_newuseragent { util_output("verbose", "Creating our UserAgent."); $ua = LWP::UserAgent->new (agent => $USERAGENT, cookie_jar => $cookie_jar, requests_redirectable => [qw(GET HEAD POST)], ); } # Output (die, loud, verbose, normal, quiet) sub util_output { # Accepts three parameters: type of output, output text, error code # Output type: "die" - output to stderr, exit with "error code" # Output type: "loud" - output to stderr, !quiet # Output type: "verbose" - output if option "verbose" enabled, !quiet # Output type: "quiet" - output if option "quiet" is enabled # Output type: "normal" - output if !quiet if ($_[0] eq "die") { print STDERR $_[1]. "\n"; exit($_[2]); } elsif ($_[0] eq "loud") { if (!exists($config{quiet})) { print $_[1]. "\n"; } } elsif ($_[0] eq "verbose") { # If this line starts with a dash, don't output a timestamp if ($_[1] =~ /^\-/) { if (exists($config{verbose})) { print $_[1]. "\n"; } } else { my $currenttime = util_currenttime(""); if (exists($config{verbose})) { print $currenttime. " ". $_[1]. "\n"; } } } elsif ($_[0] eq "quiet") { print $_[1]. "\n"; } else { # Default to normal if (!exists($config{quiet})) { print $_[1]. "\n"; } } } # Output the options from a specific