Package lamson :: Module spam
[hide private]
[frames] | no frames]

Source Code for Module lamson.spam

  1  """ 
  2  Uses the SpamBayes system to perform filtering and classification 
  3  of email.  It's designed so that you attach a single decorator 
  4  to the state functions you need to be "spam free", and then use the 
  5  lamson.spam.Filter code to do training. 
  6   
  7  SpamBayes comes with extensive command line tools for processing 
  8  maildir and mbox for spam.  A good way to train SpamBayes is to  
  9  take mail that you know is spam and stuff it into a maildir, then 
 10  periodically use the SpamBayes tools to train from that. 
 11  """ 
 12   
 13  from functools import wraps 
 14  from lamson import queue 
 15  from spambayes import hammie, Options, storage 
 16  import os 
 17  import logging 
18 19 -class Filter(object):
20 """ 21 This code implements simple filtering and is taken from the 22 SpamBayes documentation. 23 """
24 - def __init__(self, storage_file, config):
25 options = Options.options 26 options["Storage", "persistent_storage_file"] = storage_file 27 options.merge_files(['/etc/hammierc', os.path.expanduser(config)]) 28 29 self.include_trained = Options.options["Headers", "include_trained"] 30 self.dbname, self.usedb = storage.database_type([]) 31 32 self.mode = None 33 self.h = None 34 35 assert not Options.options["Hammie", "train_on_filter"], "Cannot train_on_filter."
36
37 - def open(self, mode):
38 assert not self.h, "Cannot reopen, close first." 39 assert not self.mode, "Mode should be None on open, bad state." 40 assert mode in ['r', 'c'], "Must give a valid mode: r, c." 41 42 self.mode = mode 43 self.h = hammie.open(self.dbname, self.usedb, self.mode)
44
45 - def close(self):
46 if not self.h: return 47 48 assert self.mode, "Mode was not set." 49 assert self.mode in ['r','c'], "self.mode was not r or c. Bad state." 50 51 if self.mode == 'c': 52 self.h.store() 53 self.h.close() 54 55 self.h = None 56 self.mode = None
57 58
59 - def filter(self, msg):
60 self.open('r') 61 result = self.h.filter(msg) 62 self.close() 63 return result
64
65 - def train_ham(self, msg):
66 self.open('c') 67 self.h.train_ham(msg, self.include_trained) 68 self.close()
69
70 - def train_spam(self, msg):
71 self.open('c') 72 self.h.train_spam(msg, self.include_trained) 73 self.close()
74
75 - def untrain_ham(self, msg):
76 self.open('c') 77 self.h.untrain_ham(msg) 78 self.close()
79
80 - def untrain_spam(self, msg):
81 self.open('c') 82 self.h.untrain_spam(msg) 83 self.close()
84
85 86 87 88 -class spam_filter(object):
89 """ 90 This is a decorator you attach to states that should be protected from spam. 91 You use it by doing: 92 93 @spam_filter(ham_db, rcfile, spam_dump_queue, next_state=SPAMMING) 94 95 Where ham_db is the path to your hamdb configuration, rcfile is the 96 SpamBayes config, and spam_dump_queue is where this filter should 97 dump spam it detects. 98 99 The next_state argument is optional, defaulting to None, but if you use 100 it then Lamson will transition that user into that state. Use it to mark 101 that address as a spammer and to ignore their emails or do something 102 fancy with them. 103 """ 104
105 - def __init__(self, storage, config, spam_queue, next_state=None):
106 self.storage = storage 107 self.config = config 108 self.spam_queue = spam_queue 109 self.next_state = next_state 110 assert self.next_state, "You must give next_state function." 111 112 if not os.path.exists(self.storage): 113 logging.warn("SPAM filter for %r does not have a valid storage path, it'll still run but won't do anything.", 114 (self.storage, self.config, self.spam_queue, 115 self.next_state.__name__)) 116 self.functioning = False 117 else: 118 self.functioning = True
119
120 - def __call__(self, fn):
121 @wraps(fn) 122 def category_wrapper(message, *args, **kw): 123 if self.functioning: 124 if self.spam(message.to_message()): 125 self.enqueue_as_spam(message.to_message()) 126 return self.next_state 127 else: 128 return fn(message, *args, **kw) 129 else: 130 return fn(message, *args, **kw)
131 return category_wrapper
132
133 - def spam(self, message):
134 """Determines if the message is spam or not.""" 135 spfilter = Filter(self.storage, self.config) 136 spfilter.filter(message) 137 138 if 'X-Spambayes-Classification' in message: 139 return message['X-Spambayes-Classification'].startswith('spam') 140 else: 141 return False
142
143 - def enqueue_as_spam(self, message):
144 """Drops the message into the configured spam queue.""" 145 outq = queue.Queue(self.spam_queue) 146 outq.push(str(message))
147