#!/usr/local/bin/perl5
#
# 1. with 'egrep dir/each-art >> log'  41491940
# 1326.470u 863.656s 1:33:06.41 39.2% 0+0k 242384+103io 10pf+0w
#
# 2. with 'egrep dir/* >> log'         44084303
# 1241.241u 402.263s 1:20:10.11 34.1% 0+0k 232356+39io 54pf+0w
#
#
# ABSOLUTELY NO WARRANTY WITH THIS PACKAGE. USE IT AT YOUR OWN RISK.
#
# Parse DIABLO news spool and generate raw Path: info for inpaths.
# URL: ftp://ftp.ecrc.de/pub/news/servers/diablo-utils
#
# diablo-inpaths.pl v1.0  970824  Iain Lea  iain@ecrc.de
#
# Usage: diablo-inpaths.pl [options]
#     -h       help
#     -d dir   diablo news spool dir (default: /news/spool/news)
#     -l file  log to file (default: /var/log/news/path.log)
#     -p host  hostname in Path: header (default: newsfeed.ecrc.net)
#     -v       verbose
#     -V       debug
#
# Logic:
# ------
#    +---------+--------------+--------------+
#    |Oldest   | LowWater     | HighWater    | Current
#    v         v              v              v
#    /    /    /    /    /    /    /    /    / Created new dir
#   ...  -70  -60  -50  -40  -30  -20  -10   0 Time in minutes
#
# This program uses a couple of tricks to gain speed over a normal
# 'find /spool -exec grep ...' script (note that such a find script
# would log some Path: headers twice due to diablo still appending
# articles to its latest spool files).
# 1. diablo creates a new dir in /news/spool/news for its large 
#    multi-article files every 10 minutes to stop big dirs.
# 2. the multi-article files are only changing size in the new dir.
# 3. older dirs have multi-article files that are only being read
#    from by dnewslink processes and are therefore stable & inert.
# This script uses the above mentioned points todo the following:
# - 1st time its run it logs all Path: headers from Oldest -> HighWater.
# - logs HighWater time to $SpoolDir/.diablo-inpaths.info for next run.
# - when run again it reads the $SpoolDir/.diablo-inpaths.info file and
#   sets the LowWater mark.
# - sets HighWater mark to current time - 30 minutes.
# - logs all Path: headers between LowWater -> HighWater marks.
# - logs HighWater time to $SpoolDir/.diablo-inpaths.info for next run.
# - etc. etc.
#
# TODO:
# -----
#
# Acknowledgements:
# -----------------
#   Jeff Garzik             jeff.garzik@spinne.com
#
# ChangeLog:
# ----------
# v1.0BETA
#   - 

require 'getopts.pl';
require "timelocal.pl";

$PathLogFile = "/var/log/news/path.log";
$SpoolDir = "/news/spool/news";
$FtpUrl = "ftp://ftp.ecrc.de/pub/news/servers/diablo-utils/";
chop ($PathName = `hostname`);
$Verbose = 0;
$Debug = 0;
$Version = 'v1.0';
$ScriptName = 'diablo-inpaths';
$SpoolInfoOk = 0;
$CurrTime = time;
$HighWater = $CurrTime - (60 * 30);	# 30 mins ago
$LowWater = 1;	# a long time ago :)

##############################################################################
# 
#

print "Set LowWater=$LowWater HighWater=$HighWater\n" if $Debug;

&ParseCmdLine ($0);

&ReadSpoolInfo;

&ParseSpoolDir;

&WriteSpoolInfo;

exit 0;

##############################################################################
# 
#

sub ParseCmdLine
{
	my ($ProgName) = @_;
	
	&Getopts('d:hl:p:vV');

	if ($opt_h) { 
		print <<EOT
$ScriptName $Version  $FtpUrl

Create inpaths Path: propagation data for DIABLO news relay server.
Copyright 1997 Iain Lea (iain\@ecrc.de). NOTE: Use at your own risk.

Usage: $ProgName [options]
       -h       help
       -d dir   diablo news spool dir (default: $SpoolDir)
       -l file  log to file (default: $PathLogFile)
       -p host  hostname in Path: header (default: $PathName)
       -v       verbose
       -V       debug

EOT
;
		exit 1;
	}
	$SpoolDir = $opt_d if (defined($opt_d));
	$PathLogFile = $opt_l if (defined($opt_l));
	$PathName = $opt_p if (defined($opt_p));
	$Verbose++ if (defined($opt_v));
	$Debug++ if (defined($opt_V));

	$SpoolInfoFile = "$SpoolDir/.diablo-inpaths.info";
}


sub ReadSpoolInfo
{
	print "Reading $SpoolInfoFile ...\n" if $Verbose;

	if (open (FILE, $SpoolInfoFile)) {
		while (<FILE>) {
			if (/^(\d+)$/) {
				$LowWater = $1;
				print "Set LowWater=$LowWater HighWater=$HighWater\n" if $Debug;
			}
		}
		close (FILE);
	}
}


sub WriteSpoolInfo
{
	print "Writing $SpoolInfoFile HighWater=$HighWater...\n" if $Verbose;

	if (open (FILE, "> $SpoolInfoFile")) {
		print FILE "$HighWater\n";
		close (FILE);
	} else {
		print "Error: $SpoolInfoFile - $!\n";
	}
}


sub ParseSpoolDir
{
	my ($Dir);

	print "Parsing $SpoolDir $LowWater (low) <--> $HighWater (high) ...\n" if $Verbose;

	chdir ($SpoolDir) || die "Error: $SpoolDir - $!\n";

	open (PIPE, "find . -type d -name '[A-z0-9]*' -print |") || die "Error: $SpoolDir - $!\n";

	while ($Dir = <PIPE>) 
	{
		chop $Dir;

		@Stat = stat ($Dir);
		$Ctime = $Stat[10];

		if ($Ctime > $LowWater && $Ctime < $HighWater) {
			print "DIR:  $Dir  $Ctime > $LowWater && $Ctime < $HighWater\n" if $Debug;
			&ParseDir ($Dir);
		} elsif ($Ctime <= $LowWater) {
			print "OLD:  $Dir  $Ctime =< $LowWater\n" if $Debug;
		} elsif ($Ctime >= $HighWater) {
			print "NEW:  $Dir  $Ctime >= $HighWater\n" if $Debug;
		}
	}
	close (PIPE);
}


sub ParseDir
{
	my ($Dir, $Secs) = @_;
	my (@FileList, $File);

	print "GREP $Dir/*\n";
	chdir ($Dir);
	`egrep -h "^Path: $PathName" * >> $PathLogFile`;
	chdir ($SpoolDir);

	return;

#	opendir (DIR, $Dir) || die "Error: $Dir - $!\n";
#	@FileList = egrep (!/^\./, readdir (DIR));
#	foreach $File (@FileList) {
#			print "GREP $Dir/$File\n";
#			`date`;
#			`egrep "^Path: $PathName" $Dir/$File >> $PathLogFile`;
#			`date`;
#		}
#	}
#	closedir (DIR);
}


syntax highlighted by Code2HTML, v. 0.9.1