From cde94e5047d459bc728167d8de64dbe4ad0067b9 Mon Sep 17 00:00:00 2001 From: Mike Dawson Date: Sun, 16 Aug 2009 17:12:39 +0430 Subject: [PATCH] Patch for maxage and maxsize options Dear All, Attached is the patch that I have developed to provide maxage and maxsize options. You can thus sync only the last x days of messages and exclude large messages. All details in the attached git file. Regards, -Mike -- Attached file included as plaintext by Ecartis -- -- File: submit From 04fead2b46a79675a5b29de6f2b4088b9c9448e5 Mon Sep 17 00:00:00 2001 From: mike Date: Sun, 16 Aug 2009 17:00:49 +0430 Subject: [PATCH] Patch to provide maxage and maxsize account options to exclude old/large messages This is designed to make offlineimap even better for low bandwidth connections. maxage allows you to specify a number of days and only messages within that range will be considered by offlineimap for the sync. This can be useful if you would like to start using offlineimap with a large existing account and do not want to import large archives of mail. maxsize allows you to specify the maximum size of a message to consider so that you can exclude messages with large attachments etc. In both cases the cachemessagelist function of the folder was modified to ignore messages that do not meet the criteria. If the criteria are not specified then the existing code will be executed the same as before. If a message does not meet the criteria it will be as though this message does not exist - offlineimap will completely ignore it. It will not have flags updated, it will not be deleted, it will not be considered at all. When operating against an IMAP repository a server side search function is used. This of course requires support for server side search. I have tested this with either option, no options etc. against IMAP, Maildir and Gmail. I have run variations of this patch here for the last 3 weeks or so syncing about 4 accounts normally. --- offlineimap.conf | 24 +++++++++++++ offlineimap/accounts.py | 10 ++++-- offlineimap/folder/IMAP.py | 65 +++++++++++++++++++++++++++++------ offlineimap/folder/Maildir.py | 42 ++++++++++++++++++++++ 4 files changed, 129 insertions(+), 12 deletions(-) diff --git a/offlineimap.conf b/offlineimap.conf index b9aaecb..e6a19b3 100644 --- a/offlineimap.conf +++ b/offlineimap.conf @@ -195,6 +195,30 @@ remoterepository = RemoteExample # You can also specify parameters to the commands # presynchook = imapfilter -c someotherconfig.lua +# If you have a limited amount of bandwidth available you can exclude larger +# messages (e.g. those with large attachments etc). If you do this it +# will appear to offlineimap that these messages do not exist at all. They +# will not be copied, have flags changed etc. For this to work on an IMAP +# server the server must have server side search enabled. This works with gmail +# and most imap servers (e.g. cyrus etc) +# The maximum size should be specified in bytes - e.g. 2000000 for approx 2MB + +# maxsize = 2000000 + + +# When you are starting to sync an already existing account yuo can tell offlineimap +# to sync messages from only the last x days. When you do this messages older than x +# days will be completely ignored. This can be useful for importing existing accounts +# when you do not want to download large amounts of archive email. + +# Messages older than maxage days will not be synced, their flags will +# not be changed, they will not be deleted etc. For offlineimap it will be like these +# messages do not exist. This will perform an IMAP search in the case of IMAP or Gmail +# and therefor requires that the server support server side searching. This will +# calculate the earliest day that would be included in the search and include all +# messages from that day until today. e.g. maxage = 3 to sync only the last 3 days mail + +# maxage = 3 [Repository LocalExample] diff --git a/offlineimap/accounts.py b/offlineimap/accounts.py index f0f19e4..8ec3f54 100644 --- a/offlineimap/accounts.py +++ b/offlineimap/accounts.py @@ -182,16 +182,22 @@ class AccountSynchronizationMixin: # Connect to the local cache. self.statusrepos = offlineimap.repository.LocalStatus.LocalStatusRepository(self.getconf('localrepository'), self) - + + #might need changes here to ensure that one account sync does not crash others... if not self.refreshperiod: + self.sync(siglistener) self.ui.acctdone(self.name) + return + + looping = 1 while looping: self.sync(siglistener) looping = self.sleeper(siglistener) != 2 - self.ui.acctdone(self.name) + self.ui.acctdone(self.name) + def getaccountmeta(self): return os.path.join(self.metadatadir, 'Account-' + self.name) diff --git a/offlineimap/folder/IMAP.py b/offlineimap/folder/IMAP.py index d57f077..6e2bd2c 100644 --- a/offlineimap/folder/IMAP.py +++ b/offlineimap/folder/IMAP.py @@ -23,6 +23,7 @@ from offlineimap.version import versionstr import rfc822, time, string, random, binascii, re from StringIO import StringIO from copy import copy +import time class IMAPFolder(BaseFolder): @@ -115,6 +116,7 @@ class IMAPFolder(BaseFolder): return False + # TODO: Make this so that it can define a date that would be the oldest messages etc. def cachemessagelist(self): imapobj = self.imapserver.acquireconnection() self.messagelist = {} @@ -122,20 +124,63 @@ class IMAPFolder(BaseFolder): try: # Primes untagged_responses imapobj.select(self.getfullname(), readonly = 1, force = 1) - try: - # Some mail servers do not return an EXISTS response if - # the folder is empty. - maxmsgid = long(imapobj.untagged_responses['EXISTS'][0]) - except KeyError: - return - if maxmsgid < 1: - # No messages; return - return + maxage = self.config.getdefaultint("Account " + self.accountname, "maxage", -1) + maxsize = self.config.getdefaultint("Account " + self.accountname, "maxsize", -1) + + if (maxage != -1) | (maxsize != -1): + try: + search_condition = "("; + + if(maxage != -1): + #find out what the oldest message is that we should look at + oldest_time_struct = time.gmtime(time.time() - (60*60*24*maxage)) + + #format this manually - otherwise locales could cause problems + monthnames_standard = ["Jan", "Feb", "Mar", "Apr", "May", \ + "June", "July", "Aug", "Sep", "Oct", "Nov", "Dec"] + + our_monthname = monthnames_standard[oldest_time_struct[1]-1] + daystr = "%(day)02d" % {'day' : oldest_time_struct[2]} + date_search_str = "SINCE " + daystr + "-" + our_monthname \ + + "-" + str(oldest_time_struct[0]) + + search_condition += date_search_str + + if(maxsize != -1): + if(maxage != 1): #There are two conditions - add a space + search_condition += " " + + search_condition += "SMALLER " + self.config.getdefault("Account " + self.accountname, "maxsize", -1) + + search_condition += ")" + searchresult = imapobj.search(None, search_condition) + + #result would come back seperated by space - to change into a fetch + #statement we need to change space to comma + messagesToFetch = searchresult[1][0].replace(" ", ",") + except KeyError: + return + if len(messagesToFetch) < 1: + # No messages; return + return + else: + try: + # Some mail servers do not return an EXISTS response if + # the folder is empty. + + maxmsgid = long(imapobj.untagged_responses['EXISTS'][0]) + messagesToFetch = '1:%d' % maxmsgid; + except KeyError: + return + if maxmsgid < 1: + #no messages; return + return # Now, get the flags and UIDs for these. # We could conceivably get rid of maxmsgid and just say # '1:*' here. - response = imapobj.fetch('1:%d' % maxmsgid, '(FLAGS UID INTERNALDATE)')[1] + + response = imapobj.fetch(messagesToFetch, '(FLAGS UID INTERNALDATE)')[1] finally: self.imapserver.releaseconnection(imapobj) for messagestr in response: diff --git a/offlineimap/folder/Maildir.py b/offlineimap/folder/Maildir.py index 811d759..49d4dae 100644 --- a/offlineimap/folder/Maildir.py +++ b/offlineimap/folder/Maildir.py @@ -29,6 +29,7 @@ except ImportError: uidmatchre = re.compile(',U=(\d+)') flagmatchre = re.compile(':.*2,([A-Z]+)') +timestampmatchre = re.compile('(\d+)'); timeseq = 0 lasttime = long(0) @@ -72,6 +73,28 @@ class MaildirFolder(BaseFolder): token.""" return 42 + #Checks to see if the given message is within the maximum age according + #to the maildir name which should begin with a timestamp + def _iswithinmaxage(self, messagename, maxage): + #In order to have the same behaviour as SINCE in an IMAP search + #we must convert this to the oldest time and then strip off hrs/mins + #from that day + oldest_time_utc = time.time() - (60*60*24*maxage) + oldest_time_struct = time.gmtime(oldest_time_utc) + oldest_time_today_seconds = ((oldest_time_struct[3] * 3600) \ + + (oldest_time_struct[4] * 60) \ + + oldest_time_struct[5]) + oldest_time_utc -= oldest_time_today_seconds + + timestampmatch = timestampmatchre.search(messagename) + timestampstr = timestampmatch.group() + timestamplong = long(timestampstr) + if(timestamplong < oldest_time_utc): + return False + else: + return True + + def _scanfolder(self): """Cache the message list. Maildir flags are: R (replied) @@ -92,6 +115,25 @@ class MaildirFolder(BaseFolder): filename in os.listdir(fulldirname)) for file in files: messagename = os.path.basename(file) + + #check if there is a parameter for maxage / maxsize - then see if this + #message should be considered or not + maxage = self.config.getdefaultint("Account " + self.accountname, "maxage", -1) + maxsize = self.config.getdefaultint("Account " + self.accountname, "maxsize", -1) + + if(maxage != -1): + isnewenough = self._iswithinmaxage(messagename, maxage) + if(isnewenough != True): + #this message is older than we should consider.... + continue + + #Check and see if the message is too big if the maxsize for this account is set + if(maxsize != -1): + filesize = os.path.getsize(file) + if(filesize > maxsize): + continue + + foldermatch = messagename.find(folderstr) != -1 if not foldermatch: # If there is no folder MD5 specified, or if it mismatches,