# awk program to check newsgroup names for validity, rigorously
# echo groupname | awk -f namecheck.awk
# It may look like some things in per-component checking could be done
# more efficiently by moving them into whole-name checking. The presence
# of encoded name components messes this up.
NR == 1 {
# whole-name checks
bad = 1
if (NF == 0)
print "empty name"
else if (NF > 1)
print "white space in name"
else if ($1 ~ /^\./ || $1 ~ /\.$/ || $1 ~ /\.\./)
print "bad dot(s) in name"
else if ($1 !~ /^[a-zA-Z]/) # A-Z is caught later
print "name does not begin with a letter"
else if ($1 ~ /^(junk|control)\./)
print "name starts with control or junk"
else
bad = 0
if (bad == 1)
exit 1
# per-component checks
nc = split($1, cpt, ".")
for (i = 1; i <= nc; i++) {
bad = 1
c = cpt[i]
# some of these two-part tests may look like they could be
# simplified to one-parters, but some broken awks then fail
if (c ~ /^=/ && c !~ /^=\?[^=?]+\?[^=?]+\?[^?]+\?=$/)
print "name component resembles encoded word but isn't one"
else if (c ~ /^=/ && c !~ /^=[a-zA-Z0-9+_=?-]*=$/)
print "bad character in encoded name component"
else if (c ~ /^=/ && c !~ /^=\?[^?]+\?b\?/)
print "encoded name component does not use b encoding"
else if (c ~ /^=/)
bad = 0 # looks like an okay encoded word
else if (c ~ /^[0-9]*$/)
print "all-numeric name component"
else if (c !~ /^[a-zA-Z0-9]/) # A-Z caught later
print "name component starts with non-alphanumeric"
else if (c !~ /[a-zA-Z]/) # A-Z caught later
print "name component does not contain letter"
else if (c == "all" || c == "ctl")
print "`all' or `ctl' used as name component"
else if (length(c) > 24)
print "name component longer than 24 characters"
else if (c ~ /[A-Z]/)
print "uppercase letter(s) in name"
else if (c ~ /[^a-z0-9+_-]/)
print "illegal character(s) in name"
else if (c ~ /--|__|\+\+./) # sigh, c++ etc must be allowed
print "repeated punctuation in name"
else if (c == cpt[i+1])
print "repeated component(s) in name"
else
bad = 0
if (bad == 1)
exit 1
}
}
NR >= 2 {
print "newline(s) in name"
exit 1
}
END {
exit 0
}
syntax highlighted by Code2HTML, v. 0.9.1