#! /bin/sh # Gateways articles from mailing lists to netnews. Feed the mail into # this, and send the output into relaynews or newsspool for C News, # presumably rnews for B news. # # NB! Later versions of this file will be in C News. # # We ensure that RFC1036 headers exist else we add them. # (Path, Message-ID, From, Date, Newsgroups, Subject) # Mask To, Cc, Newsgroups, Resent- with Original- to avoid potential # infinite loops. Perhaps we should delete them? # Throw away Received, Bcc, Fcc, In-Reply-To, Illegal-*, X-Mailer # Add our own Path, Sender, Approved # Correct headers of the form Text:\n by adding space after : # Rewrite From: into address (comments) instead of phrase # and qualify it. (!! should do a better job) # # Mark Moraes, University of Toronto ### Configuration variables host=cs.toronto.edu # this gets put in Path:, and will therefore # not receive the article. Make sure it's # not the name of a news host. fakeuser=nevdull # user for Path: and From: if all else fails. # Should preferably be an alias for /dev/null admin=list-admin # for the Sender: line. Should point at the # person that runs the gateway. ### End of configuration. You shouldn't need to change stuff after this. case $# in 1) newsgroups=$1; shift;; *) echo "Usage: $0 newsgroup" >&2; exit 1; esac SHELL=/bin/sh; export SHELL # just in case system() runs $SHELL, not sh. path=$host!$fakeuser sender=$admin@$host approved=$newsgroups@mail.$host tee /tmp/m/save$$ | nawk 'function extract(s, startchar, endchar, tmp1, tmp2, tmp) { # s, startchar, endchar are arguments, rest are locals. # This returns whatever is in s between startchar and endchar (not # inclusive). tmp1 = index(s, startchar); if (tmp1 != 0) { tmp = substr(s, tmp1 + 1, length(s) - tmp1); tmp2 = index(tmp, endchar); if (tmp2 != 0) tmp = substr(tmp, 1, tmp2 - 1); } else tmp = ""; return tmp; } function strip(s, tmp) { # s is an argument, rest are locals. # strips off leading and trailing whitespace. tmp = s; if (match(tmp, /^[ \t]+/) != 0) tmp = substr(tmp, RLENGTH + 1, length(tmp) - RLENGTH); if (match(tmp, /[ \t]+$/) != 0) tmp = substr(tmp, 1, RSTART - 1); return tmp; } function upto(s, endchar, tmp1, tmp) { # s, endchar are arguments, rest are locals. # This returns the string upto endchar, not including endchar, # stripping off any leading and trailing whitespace. tmp1 = index(s, endchar); if (tmp1 == 0) tmp = ""; else tmp = substr(s, 1, tmp1 - 1); return tmp; } function addrmunge(s, tmp1, tmp2, addr) { # s is an argument, rest are locals # tries to rewrite an address from the RFC822 "phrase
" form # to RFC1036 "address (phrase)" form. If address has no @ or !, # assume it is local and add @$host to it. Yuck! # We do not pretend to handle pathological (in the psychiatric sense) # cases with imbalanced or nested parenthesis, angle brackets # or quoting. Why doesnt awk sub() have \(\)? if (s ~ /.*[ \t]*<.*>/) { addr = extract(s, "<", ">"); phrase = upto(s, "<"); } else if (s ~ /.*\(.*\)/) { phrase = extract(s, "(", ")"); addr = upto(s, "("); } else { addr = s; phrase = ""; } addr = strip(addr, " "); phrase = strip(phrase, " "); if (addr !~ /.*@.*/ && addr !~ /.*!.*/) addr = addr "@'"$host"'"; if (phrase ~ /[^ \t]+/) addr = sprintf("%s (%s)", addr, phrase) return addr; } BEGIN { inbody = ""; ignore = ""; print "Path: '"$path"'"; print "Newsgroups: '"$newsgroups"'"; } inbody == "y" { print; next; } inbody == "" && $0 ~ /^$/ { inbody = "y"; if (saw["from"] == "") { if (replyto != "") addr=replyto; else if (from != "") addr=from; else if (rpath != "") addr=rpath; else addr="'"$sender"'"; print "From:", addrmunge(addr); } if (saw["date"] == "") { # stolen from C News anne.jones gendate="set `date`; echo $1, $3 $2 $6 $4 $5" printf "Date: "; system(gendate); } if (saw["message-id"] == "") { # stolen from C News anne.jones genmsgid="set `date`; echo \"\<$6$2$3.` echo $4 | tr -d : `.$$@'"$host"'\>\""; printf "Message-ID: "; system(genmsgid); } if (saw["subject"] == "") print "Subject: (none)"; print "Sender: '"$sender"'" print "Approved: '"$approved"'" print; next; } ignore == "y" && $0 ~ /^[ \t]/ { next; } /^[ \t]/ { print; next; } /^[^ \t]*:/ { s = $0; ignore = ""; if (s !~ /^[^ \t]*: /) # C news insists on a space after : sub(":", ": ", s); if (s ~ /^[xX]-[mM][aA][iI][lL][eE][rR]:/ || \ s ~ /^[iI][lL][lL][eE][gG][aA][lL]-.*:/ || \ s ~ /^[rR][eE][cC][eE][iI][vV][eE][dD]:/ || \ s ~ /^[bB][cC][cC]:/ || s ~ /^[fF][cC][cC]:/ || \ s ~ /^[iI][nN]-[rR][eE][pP][lL][yY]-[tT][oO]:/) { ignore = "y"; next; } if (s ~ /^[pP][aA][tT][hH]:/ || \ s ~ /^[aA][pP][pP][rR][oO][vV][eE][dD]:/ || \ s ~ /^[sS][eE][nN][dD][eE][rR]:/ || \ s ~ /^[nN][eE][wW][sS][gG][rR][oO][uU][pP][sS]:/ || \ s ~ /[tT][oO]:/ || s ~ /[cC][cC]:/ || \ s ~ /^[rR][eE][sS][eE][nN][tT]-.*:/) { printf("Original-%s\n", s); next; } if (s ~ /^[fF][rR][oO][mM]:/) { saw["from"] = s; sub(/^[fF][rR][oO][mM]:/, "", s); print "From:", addrmunge(s); next; } if (s ~ /^[dD][aA][tT][eE]:/) { saw["date"] = s; sub(/^[dD][aA][tT][eE]:/, "Date:", s); # s = checkdate(s); } if (s ~ /^[mM][eE][sS][sS][aA][gG][eE]-[iI][dD]:/) { saw["message-id"] = s; sub(/^[mM][eE][sS][sS][aA][gG][eE]-[iI][dD]:/,"Message-ID:",s); # s = checkmsgid(s); } if (s ~ /^[rR][eE][pP][lL][yY]-[tT][oO]:/) { replyto = s; sub(/^[rR][eE][pP][lL][yY]-[tT][oO]:/, "", replyto); } if (s ~ /^[rR][eE][tT][uU][rR][nN]-[pP][aA][tT][hH]:/) { rpath = s; sub(/^[rR][eE][tT][uU][rR][nN]-[pP][aA][tT][hH]:/, "", rpath); } if (s ~ /^[sS][uU][bB][jJ][eE][cC][tT]:/) { saw["subject"] = s; sub(/^[sS][uU][bB][jJ][eE][cC][tT]:/, "Subject:", s); } print s; next; } /^From / { from = $2; next; } { printf("non header line in header: %s\n", $0) # | "cat >&2" } END { if (inbody == "") # may as well stick a blank line on print ""; }' | tee /tmp/m/out$$