#! /bin/sh # newsdaily - daily housekeeping chores # =()<. ${NEWSCONFIG-@@}>()= . ${NEWSCONFIG-/usr/local/libexec/cnews/config} PATH=$NEWSCTL/bin:$NEWSBIN/maint:$NEWSBIN:$NEWSPATH ; export PATH umask $NEWSUMASK gripes="/tmp/ngrip$$" tmp="/tmp/ndai$$" sus="/tmp/suspects$$" trap "rm -f $gripes $tmp $sus ; trap 0 ; exit 0" 0 1 2 15 >$gripes cd $NEWSCTL # keep one generation of log -- it's big rm -f log.o log.o.Z mv log log.o touch log # log.o gets compressed below # keep several generations of other logs for statistics and troubleshooting logroll -o errlog logroll -o batchlog # report any errors sleep 500 # hope that errlog.o is quiescent after this if test -s errlog.o then ( echo "errlog.o" echo --------- cat errlog.o echo --------- echo ) >>$gripes fi # look for input anomalies cd $NEWSARTS/in.coming find . -type f -mtime +1 -print | sed 's;^\./;;' | egrep -v '^bad/' >$tmp if test -s $tmp # old non-bad files lying about then ( echo 'old input files:' ls -ldtr `cat $tmp` echo ) >>$gripes fi find bad -type f -name '[0-9]*' -mtime -2 -print >$tmp # recent bad batches if test -s $tmp then ( echo 'recent bad input batches (perhaps worth investigation):' ls -ldtr `cat $tmp` echo ) >>$gripes fi find bad -type f -name '[0-9]*' -mtime +7 -exec rm -f '{}' ';' cd $NEWSCTL # look for output anomalies cd $NEWSARTS/out.going find . -type f -name 'togo*' -size +0 -mtime +1 -print >$tmp if test -s $tmp then ( echo 'batching possibly stalled for sites:' sed 's;^\./\([^/]*\)/.*;\1;' $tmp | sort -u echo ) >>$gripes fi cd $NEWSCTL no=0 if test -f batchlog.o then no=`egrep 'disk too full' batchlog.o | wc -l` fi if test " $no" -gt 0 then ( echo "space shortage(s) limited batching $no times" echo ) >>$gripes fi # do a quick check of the active file checkactive -q >$tmp if test -s $tmp then ( echo 'checkactive reports problems in active file:' cat $tmp echo ) >>$gripes fi # sweep log file, once, for suspect lines egrep '` ancient future unparsable header space in Message-ID' log.o >$sus # compress the old log file, now that we're done with it compress log.o # look for problem newsgroups on input (can miss cross-posted articles) egrep 'junked|junk group is excluded' $sus | sed 's/.*`\(.*\)'"'"'.*/\1/' | sort | uniq -c | sort -nr | sed 5q >$tmp if test -s $tmp then ( echo 'leading five unknown newsgroups by number of articles:' cat $tmp echo ) >>$gripes fi egrep unapproved $sus | sed 's/.*`\(.*\)'"'"'.*/\1/' | sort | uniq -c | sort -nr | sed 5q >$tmp if test -s $tmp then ( echo 'top five supposedly-moderated groups with unmoderated postings:' cat $tmp echo ) >>$gripes fi egrep 'no subscribed' $sus | sed 's/.*`\(.*\)'"'"'.*/\1/' | sort | uniq -c | sort -nr | sed 5q >$tmp if test -s $tmp then ( echo 'leading five unsubscribed newsgroups:' cat $tmp echo ) >>$gripes fi # And other signs of problems. egrep 'ancient|too far in the future|unparsable Date' $sus | egrep ' - ' | awk '{print $4}' | sort | uniq -c | sort -nr | sed 5q >$tmp if test -s $tmp then ( echo 'leading five sites sending stale/future/misdated news:' cat $tmp echo ) >>$gripes fi egrep ' (no|empty) .* header|contains non-|Message-ID|space in' $sus | egrep ' - ' | awk '{print $4}' | sort | uniq -c | sort -nr | sed 5q >$tmp if test -s $tmp then ( echo 'leading five sites sending news with bad headers:' cat $tmp echo ) >>$gripes fi # send it report 'newsdaily report' <$gripes # and do other daily chores $NEWSBIN/relay/dodelayed # if there are dead-newsgroup directories to be deleted when empty, try them if test -s dirs.tbd then while read dir do if test ! -d $NEWSARTS/$dir then continue # NOTE CONTINUE fi cd $NEWSARTS/$dir if test " `ls | egrep '^[0-9]+$' | wc -l`" -gt 0 then # articles remain, try again tomorrow... echo "$dir" else # looks like we can scrap it... rm -f .[a-zA-Z]* *.* $NEWSOV/$dir/.overview cd $NEWSARTS rmdir $dir 2>/dev/null while test `expr "$dir" : '.*/.*'` -gt 0 do dir=`expr "$dir" : '\(.*\)/[^/]*'` rmdir $dir 2>/dev/null done cd $NEWSOV rmdir $dir 2>/dev/null while test `expr "$dir" : '.*/.*'` -gt 0 do dir=`expr "$dir" : '\(.*\)/[^/]*'` rmdir $dir 2>/dev/null done fi done dirs.ttd cd $NEWSCTL mv dirs.ttd dirs.tbd fi # if symlinks.used is older than the oldest article, get rid of it if test -f symlinks.used then usedat="`cat symlinks.used`" case "$usedat" in '') usedat=0 ;; esac oldest="`sed 1q history | sed 's/^[^ ]* //' | sed 's/[~ ].*//'`" case "$oldest" in '') oldest=`now` ;; esac # throw in 10000s for a safety margin obsoleteness=`echo "$usedat $oldest" | awk '{ print $1 - $2 + 10000 }'` case "$obsoleteness" in -*) rm -f symlinks.used ;; esac fi