# # Obfuscated ASCII spam, mostly geocities addresses # Bayes 99 / Sexmail and geocities URLs are a sign for spam # # # Positive score rules, sign for ascii arts # rawbody __GAP_2_CHAR /[a-z][ ]{5}[a-z]/i rawbody __GAP_3_CHAR /[a-z][ ]{6}[a-z]/i rawbody __GAP_4_CHAR /[a-z][ ]{7}[a-z]/i rawbody __GAP_5_CHAR /[a-z][ ]{8}[a-z]/i rawbody __GAP_6_CHAR /[a-z][ ]{9}[a-z]/i rawbody __GAP_7_CHAR /[a-z][ ]{10}[a-z]/i rawbody __GAP_8_CHAR /[a-z][ ]{11}[a-z]/i rawbody __GAP_9_CHAR /[a-z][ ]{12}[a-z]/i rawbody __GAP_10_CHAR /[a-z][ ]{13}[a-z]/i # # Negative score rules, should not show up in ascii arts. # rawbody __GAP_1_ALPH_L /[^a-z][0-9\.,:-]{1,3}[ ]{4}[a-z0-9]{2,3}/i rawbody __GAP_2_ALPH_L /[^a-z][0-9\.,:-]{1,3}[ ]{5}[a-z0-9]{2,3}/i rawbody __GAP_3_ALPH_L /[^a-z][0-9\.,:-]{1,3}[ ]{6}[a-z0-9]{2,3}/i rawbody __GAP_4_ALPH_L /[^a-z][0-9\.,:-]{1,3}[ ]{7}[a-z0-9]{2,3}/i rawbody __GAP_5_ALPH_L /[^a-z][0-9\.,:-]{1,3}[ ]{8}[a-z0-9]{2,3}/i rawbody __GAP_6_ALPH_L /[^a-z][0-9\.,:-]{1,3}[ ]{9}[a-z0-9]{2,3}/i rawbody __GAP_7_ALPH_L /[^a-z][0-9\.,:-]{1,3}[ ]{10}[a-z0-9]{2,3}/i rawbody __GAP_8_ALPH_L /[^a-z][0-9\.,:-]{1,3}[ ]{11}[a-z0-9]{2,3}/i rawbody __GAP_9_ALPH_L /[^a-z][0-9\.,:-]{1,3}[ ]{12}[a-z0-9]{2,3}/i rawbody __GAP_10_ALPH_L /[^a-z][0-9\.,:-]{1,3}[ ]{13}[a-z0-9]{2,3}/i meta __GAP_ALPH_L (__GAP_1_ALPH_L + __GAP_2_ALPH_L + __GAP_3_ALPH_L + __GAP_4_ALPH_L + __GAP_5_ALPH_L + __GAP_6_ALPH_L + __GAP_7_ALPH_L + __GAP_8_ALPH_L + __GAP_9_ALPH_L + __GAP_10_ALPH_L >= 5) # # Negative score rules, should not show up in ascii arts. # rawbody __GAP_1_ALPH_R /[0-9a-z]{2,3}[ ]{4}[0-9\.,:-]{1,3}[^a-z]/i rawbody __GAP_2_ALPH_R /[0-9a-z]{2,3}[ ]{5}[0-9\.,:-]{1,3}[^a-z]/i rawbody __GAP_3_ALPH_R /[0-9a-z]{2,3}[ ]{6}[0-9\.,:-]{1,3}[^a-z]/i rawbody __GAP_4_ALPH_R /[0-9a-z]{2,3}[ ]{7}[0-9\.,:-]{1,3}[^a-z]/i rawbody __GAP_5_ALPH_R /[0-9a-z]{2,3}[ ]{8}[0-9\.,:-]{1,3}[^a-z]/i rawbody __GAP_6_ALPH_R /[0-9a-z]{2,3}[ ]{9}[0-9\.,:-]{1,3}[^a-z]/i rawbody __GAP_7_ALPH_R /[0-9a-z]{2,3}[ ]{10}[0-9\.,:-]{1,3}[^a-z]/i rawbody __GAP_8_ALPH_R /[0-9a-z]{2,3}[ ]{11}[0-9\.,:-]{1,3}[^a-z]/i rawbody __GAP_9_ALPH_R /[0-9a-z]{2,3}[ ]{12}[0-9\.,:-]{1,3}[^a-z]/i rawbody __GAP_10_ALPH_R /[0-9a-z]{2,3}[ ]{13}[0-9\.,:-]{1,3}[^a-z]/i meta __GAP_ALPH_R (__GAP_1_ALPH_R + __GAP_2_ALPH_R + __GAP_3_ALPH_R + __GAP_4_ALPH_R + __GAP_5_ALPH_R + __GAP_6_ALPH_R + __GAP_7_ALPH_R + __GAP_8_ALPH_R + __GAP_9_ALPH_R + __GAP_10_ALPH_R >= 5) # # Negative score rules, should not show up in ascii arts. # rawbody __GAP_1_ALPH_B /[^a-z][0-9]{1,3}[ ]{4}[0-9\.,:-]{1,3}[^a-z]/i rawbody __GAP_2_ALPH_B /[^a-z][0-9]{1,3}[ ]{5}[0-9\.,:-]{1,3}[^a-z]/i rawbody __GAP_3_ALPH_B /[^a-z][0-9]{1,3}[ ]{6}[0-9\.,:-]{1,3}[^a-z]/i rawbody __GAP_4_ALPH_B /[^a-z][0-9]{1,3}[ ]{7}[0-9\.,:-]{1,3}[^a-z]/i rawbody __GAP_5_ALPH_B /[^a-z][0-9]{1,3}[ ]{8}[0-9\.,:-]{1,3}[^a-z]/i rawbody __GAP_6_ALPH_B /[^a-z][0-9]{1,3}[ ]{9}[0-9\.,:-]{1,3}[^a-z]/i rawbody __GAP_7_ALPH_B /[^a-z][0-9]{1,3}[ ]{10}[0-9\.,:-]{1,3}[^a-z]/i rawbody __GAP_8_ALPH_B /[^a-z][0-9]{1,3}[ ]{11}[0-9\.,:-]{1,3}[^a-z]/i rawbody __GAP_9_ALPH_B /[^a-z][0-9]{1,3}[ ]{12}[0-9\.,:-]{1,3}[^a-z]/i rawbody __GAP_10_ALPH_B /[^a-z][0-9]{1,3}[ ]{13}[0-9\.,:-]{1,3}[^a-z]/i meta __GAP_ALPH_B (__GAP_1_ALPH_B + __GAP_2_ALPH_B + __GAP_3_ALPH_B + __GAP_4_ALPH_B + __GAP_5_ALPH_B + __GAP_6_ALPH_B + __GAP_7_ALPH_B + __GAP_8_ALPH_B + __GAP_9_ALPH_B + __GAP_10_ALPH_B >= 5) # # Negative score rules, should not show up in ascii arts. # rawbody __GAP_2_CHARB /[A-Z]{2,3}[ ]{5}[A-Z]{2,3}/ rawbody __GAP_3_CHARB /[A-Z]{2,3}[ ]{6}[A-Z]{2,3}/ rawbody __GAP_4_CHARB /[A-Z]{2,3}[ ]{7}[A-Z]{2,3}/ rawbody __GAP_5_CHARB /[A-Z]{2,3}[ ]{8}[A-Z]{2,3}/ rawbody __GAP_6_CHARB /[A-Z]{2,3}[ ]{9}[A-Z]{2,3}/ rawbody __GAP_7_CHARB /[A-Z]{2,3}[ ]{10}[A-Z]{2,3}/ rawbody __GAP_8_CHARB /[A-Z]{2,3}[ ]{11}[A-Z]{2,3}/ rawbody __GAP_9_CHARB /[A-Z]{2,3}[ ]{12}[A-Z]{2,3}/ rawbody __GAP_10_CHARB /[A-Z]{2,3}[ ]{13}[A-Z]{2,3}/ meta __GAP_ALPH_D (__GAP_2_CHARB + __GAP_3_CHARB + __GAP_4_CHARB + __GAP_5_CHARB + __GAP_6_CHARB + __GAP_7_CHARB + __GAP_8_CHARB + __GAP_9_CHARB + __GAP_10_CHARB >= 5) # # If we have to many ALPH_L and ALPH_B there is again a possibility for ascii arts # meta __GAP_ALPH_C (__GAP_1_ALPH_L + __GAP_2_ALPH_L + __GAP_3_ALPH_L + __GAP_4_ALPH_L + __GAP_5_ALPH_L + __GAP_6_ALPH_L + __GAP_7_ALPH_L + __GAP_8_ALPH_L + __GAP_9_ALPH_L + __GAP_10_ALPH_L + __GAP_1_ALPH_R + __GAP_2_ALPH_R + __GAP_3_ALPH_R + __GAP_4_ALPH_R + __GAP_5_ALPH_R + __GAP_6_ALPH_R + __GAP_7_ALPH_R + __GAP_8_ALPH_R + __GAP_9_ALPH_R + __GAP_10_ALPH_R + __GAP_1_ALPH_B + __GAP_2_ALPH_B + __GAP_3_ALPH_B + __GAP_4_ALPH_B + __GAP_5_ALPH_B + __GAP_6_ALPH_B + __GAP_7_ALPH_B + __GAP_8_ALPH_B + __GAP_9_ALPH_B + __GAP_10_ALPH_B >= 10) # # Unlikly combination of consonants # rawbody __GAP_2_WORD / [cfgklmnpqrstwxz]{2} /i # # Try to catch whole words with digits/alphanummeric chars # rawbody __GAP_4_WORDA / [a-z,\.;]{2}[0-9][a-z] /i rawbody __GAP_4_WORDB / [a-z,\.;][0-9][a-z]{2} /i rawbody __GAP_4_WORDC / [a-z,\.;][0-9]{2}[a-z] /i rawbody __GAP_4_WORDD / [a-z,\.;][0-9]{3} /i rawbody __GAP_4_WORDE /(?! [0-9]{4} ) [0-9][a-z0-9;]{3} /i meta __GAP_4_WORD (__GAP_4_WORDA + __GAP_4_WORDB + __GAP_4_WORDC + __GAP_4_WORDD + __GAP_4_WORDE >= 2) rawbody __GAP_5_WORDA / [a-z,\.;][0-9][a-z]{3} /i rawbody __GAP_5_WORDB / [a-z,\.;][0-9]{2}[a-z] /i rawbody __GAP_5_WORDC / [a-z,\.;][0-9][a-z]{2} /i rawbody __GAP_5_WORDD / [a-z,\.;][0-9]{3} /i rawbody __GAP_5_WORDE /(?! [0-9]{5} ) [0-9][a-z0-9;]{4} /i meta __GAP_5_WORD (__GAP_5_WORDA + __GAP_5_WORDB + __GAP_5_WORDC + __GAP_5_WORDD + __GAP_5_WORDE >= 2) rawbody __GAP_6_WORDA /(?![ ][0-9]{6} ) [0-9][a-z0-9;]{5} /i rawbody __GAP_6_WORDB /(?![ ][a-z]{6} ) [a-z][a-z0-9;]{5} /i rawbody __GAP_7_WORDA /(?![ ][0-9]{7} ) [0-9][a-z0-9;]{6} /i rawbody __GAP_7_WORDB /(?![ ][a-z]{7} ) [a-z][a-z0-9;]{6} /i # # Negative, no single word match found. # meta __GAP_NOWORD (__GAP_2_WORD + __GAP_4_WORD + __GAP_5_WORD + __GAP_6_WORDA + __GAP_6_WORDB + __GAP_7_WORDA + __GAP_7_WORDB < 1) # # Catch single points and commata # rawbody __GAP_POINT_1 /[]{4}[\.\,\][]{4}/ rawbody __GAP_POINT_2 /[]{4};;[]{4}/ meta __GAP_POINT (__GAP_POINT_1 || __GAP_POINT_2) # # Char combinations which are very unlikly. # rawbody __MY_OBFUZ /z(r|f|k|j|v|x)/i rawbody __MY_OBFUJ /j(d|h|x|q|b|z|v|w)/i rawbody __MY_OBFUY /y(b|j)/i rawbody __MY_OBFUX /x(b|f|d|h|u)|dh|uj|vsh/i rawbody __MY_OBFUT /t(q|b|l|j)/i rawbody __MY_OBFU_MISC /uj|vsh/i meta __RANDOM_CHARS_1 (__MY_OBFUZ + __MY_OBFUJ + __MY_OBFUY + __MY_OBFUX + __MY_OBFUT + __MY_OBFU_MISC >= 3) meta __RANDOM_CHARS_2 (__MY_OBFUZ + __MY_OBFUJ + __MY_OBFUY + __MY_OBFUX + __MY_OBFUT + __MY_OBFU_MISC >= 5) # # Combine __RANDOM_CHARS_1 and/or __GAP_2_WORD with negative matchers # meta __RANDOM_CHARS_3 ((__RANDOM_CHARS_1 || __RANDOM_CHARS_2 || __GAP_2_WORD) + __LISTKEYWORD == 2) meta __RANDOM_CHARS_4 ((__RANDOM_CHARS_1 || __RANDOM_CHARS_2 || __GAP_2_WORD) + __PATHNAME == 2) # # To be listed in a URIBL is a sign for ascii arts. # meta __HAVESPAMURI (__URI_RBL_MULTI + __URI_RBL_SINGLE >= 1) # # Try to catch some false positive cases # rawbody __LISTKEYWORD /(?:level|host|fr.|USD|CHF|EUR|euro|file|price|pieces|stück|save|artikel|server|Kbyte|Copyright)/i rawbody __PATHNAME /(?:\:[\/\\])/ rawbody __HTMLCOMMENT /(?: