# BOGUS-URL-PATTERNS.RC # # Patterns to catch badly formatted URLs in the message # bodies of spam. # # Decimal URL # # This catches http://99999999/ urls. # :0 B * LEANTAG ?? no * -1000^0 * 1100^1 (^|[^0-9a-z])(=3D)?h?t?t?p?://[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]+(:[0-9][0-9][0-9]?[0-9]?[0-9]?)?/ { LT3=yes LT2=yes SBLOG="C3R-Pattern Match (${TESTNAME}) (Decimal URL)" INCLUDERC=${SBDIR}/functions/loglevel.rc } # Numeric URL with non-standard port # # This catches http://208.134.56.32:1234 urls. # :0 B * LEANTAG ?? no * -1000^0 * 1100^1 (^|[^0-9a-z])(=3D)?h?t?t?p?://[0-9][0-9]?[0-9]?(ÿ|\.|=2E)[0-9][0-9]?[0-9]?(ÿ|\.|=2E)[0-9][0-9]?[0-9]?(ÿ|\.|=2E)[0-9][0-9]?[0-9]?(:[0-9][0-9][0-9]?[0-9]?[0-9]?)([^0-9a-z:.]|$) { LT3=yes LT2=yes SBLOG="C3R-Pattern Match (${TESTNAME}) (Numeric URL with non-standard port)" INCLUDERC=${SBDIR}/functions/loglevel.rc } # URLs with Bogus Queries # # This catches http://example.com?zzzz type URLs -- a URL should end with a space # (nothing) or a forward slash, NEVER a query/question mark. # :0 B * LEANTAG ?? no * ! (^|[^-_0-9a-z]|=2E)trueswitch(ÿ|\.|=2E)com([^a-z0-9.]|\. |\.$|$) * -1000^0 * 1100^1 (^|[^0-9a-z])(=3D)?https?://([0-9a-z][-_0-9a-z]+\.)+[a-z][a-z][a-z]?[a-z]?\?[0-9a-z]+ { LT3=yes LT2=yes SBLOG="C3R-Pattern Match (${TESTNAME}) (URL with bogus query)" INCLUDERC=${SBDIR}/functions/loglevel.rc } # Spam Haven Domain Patterns # # Pattern match to catch new domains of spammers who use DNS poisoning # and other similar tricks to evade other filtering. # :0 * !--.*forwarded message -- * !^forwarded message: * -1000^0 * B ?? 1100^0 (^|[^0-9a-z]|=3D)([0-9a-z][-_0-9a-z]*(ÿ|\.|[=%]2E))*[a-z][a-z]mort(ÿ|\.|[=%]2E)(com|info|net|org|us)([^a-z0-9.]|\. |\.$|$) { LT3=yes LT2=yes SBLOG="C3R-Pattern Match (${TESTNAME}) (Spam Haven Domain Pattern)" INCLUDERC=${SBDIR}/functions/loglevel.rc } # Geocities URL with a query # :0 B * -1000^0 * B ?? 1100^0 (^|[^-_0-9a-z]|[=%]20)http://([a-z]+(ÿ|\.|[=%]2E))*geocities(ÿ|\.|[=%]2E)com/([0-9a-z][-_0-9a-z]+/)+\? * B ?? 1100^0 (^|[^-_0-9a-z]|[=%]20)http://([a-z]+(ÿ|\.|[=%]2E))*geocities(ÿ|\.|[=%]2E)yahoo(ÿ|\.|[=%]2E)com(.[a-z][a-z])?/([0-9a-z][-_0-9a-z]+/)+\? { LT3=yes LT2=yes SBLOG="C3R-Pattern Match (${TESTNAME}) (Geocities URL with query)" INCLUDERC=${SBDIR}/functions/loglevel.rc } # spaces.msn.com URL with redirector # :0 B * -1000^0 * B ?? 1100^0 (^|[^-_0-9a-z]|[=%]20)http://([a-z]+(ÿ|\.|[=%]2E))*spaces(ÿ|\.|[=%]2E)msn(ÿ|\.|[=%]2E)com/members/[0-9a-z]+/ib= { LT3=yes LT2=yes SBLOG="C3R-Pattern Match (${TESTNAME}) (spaces.msn.com URL with redirector)" INCLUDERC=${SBDIR}/functions/loglevel.rc } # Tripod URL with a query # :0 B * -1000^0 * B ?? 1100^0 (^|[^-_0-9a-z]|[=%]20)http://([a-z]+(ÿ|\.|[=%]2E))*tripod(ÿ|\.|[=%]2E)com/([0-9a-z][-_0-9a-z]+/)+\? { LT3=yes LT2=yes SBLOG="C3R-Pattern Match (${TESTNAME}) (Tripod URL with query)" INCLUDERC=${SBDIR}/functions/loglevel.rc } # URL with illegal characters # :0 B * -1000^0 * B ?? 1100^0 (^|[^-_0-9a-z]|[=%]20)https?:(//|/\\|\\/|\\\\)\ ([0-9a-z][-_0-9a-z]+(ÿ|\.|[=%]2E))*\ [<>]((ÿ|\.|[=%]2E)|[0-9a-z]) { LT3=yes LT2=yes SBLOG="C3R-Pattern Match (${TESTNAME}) (URL with illegal character)" INCLUDERC=${SBDIR}/functions/loglevel.rc } # URL with invalid protocol statement # :0 B * LT2 ?? no * -1000^0 * B ?? 1100^0 (^|[^-_0-9a-z]|[=%]20)https?:(%3a|/\\|\\/|\\\\)[0-9a-z] { LT3=yes LT2=yes SBLOG="C3R-Pattern Match (${TESTNAME}) (Invalid Protocol Statement)" INCLUDERC=${SBDIR}/functions/loglevel.rc }