SQLite: make postponing transaction committing possible.

This should significantly improve performance when used to write large
amounts of messages.

This feature is enabled through the fsync configuration option.

Code refactorize around fsync.

This addresses #390 (although it doesn't necessarily fix all instances
of that problem yet).

Github-ref: https://github.com/OfflineIMAP/offlineimap/issues/390
Originally-written-by: Giel van Schijndel <me@mortis.eu>
Signed-off-by: Nicolas Sebrecht <nicolas.s-dev@laposte.net>
This commit is contained in:
Giel van Schijndel 2016-10-25 21:34:38 +02:00 committed by Nicolas Sebrecht
parent 41c9694488
commit b6ede627a9
5 changed files with 65 additions and 35 deletions

View File

@ -164,6 +164,9 @@ accounts = Test
# at the expense of greater risk of message duplication in the event of a system
# crash or power loss. Default is true. Set it to false to disable fsync.
#
# SQLite honors this option since v7.0.8+. However, those SQLite improvements
# are still EXPERIMENTAL.
#
#fsync = true

View File

@ -70,6 +70,7 @@ class BaseFolder(object):
self._sync_deletes = self.config.getdefaultboolean(
self.repoconfname, "sync_deletes", True)
self._dofsync = self.config.getdefaultboolean("general", "fsync", True)
# Determine if we're running static or dynamic folder filtering
# and check filtering status.
@ -103,6 +104,16 @@ class BaseFolder(object):
# fails if the str is utf-8
return self.name.decode('utf-8')
def __enter__(self):
"""Starts a transaction. This will postpone (guaranteed) saving to disk
of all messages saved inside this transaction until its committed."""
pass
def __exit__(self, exc_type, exc_val, exc_tb):
"""Commits a transaction, all messages saved inside this transaction
will only now be persisted to disk."""
pass
@property
def accountname(self):
"""Account name as string"""
@ -118,6 +129,9 @@ class BaseFolder(object):
else:
return self.repository.should_sync_folder(self.ffilter_name)
def dofsync(self):
return self._dofsync
def suggeststhreads(self):
"""Returns True if this folder suggests using threads for actions.
@ -891,38 +905,39 @@ class BaseFolder(object):
)
return
for num, uid in enumerate(copylist):
# Bail out on CTRL-C or SIGTERM.
if offlineimap.accounts.Account.abort_NOW_signal.is_set():
break
with self:
for num, uid in enumerate(copylist):
# Bail out on CTRL-C or SIGTERM.
if offlineimap.accounts.Account.abort_NOW_signal.is_set():
break
if uid == 0:
self.ui.warn("Assertion that UID != 0 failed; ignoring message.")
continue
if uid == 0:
self.ui.warn("Assertion that UID != 0 failed; ignoring message.")
continue
if uid > 0 and dstfolder.uidexists(uid):
# dstfolder has message with that UID already, only update status.
flags = self.getmessageflags(uid)
rtime = self.getmessagetime(uid)
statusfolder.savemessage(uid, None, flags, rtime)
continue
if uid > 0 and dstfolder.uidexists(uid):
# dstfolder has message with that UID already, only update status.
flags = self.getmessageflags(uid)
rtime = self.getmessagetime(uid)
statusfolder.savemessage(uid, None, flags, rtime)
continue
self.ui.copyingmessage(uid, num+1, num_to_copy, self, dstfolder)
# Exceptions are caught in copymessageto().
if self.suggeststhreads():
self.waitforthread()
thread = threadutil.InstanceLimitedThread(
self.getinstancelimitnamespace(),
target=self.copymessageto,
name="Copy message from %s:%s"% (self.repository, self),
args=(uid, dstfolder, statusfolder)
)
thread.start()
threads.append(thread)
else:
self.copymessageto(uid, dstfolder, statusfolder, register=0)
for thread in threads:
thread.join() # Block until all "copy" threads are done.
self.ui.copyingmessage(uid, num+1, num_to_copy, self, dstfolder)
# Exceptions are caught in copymessageto().
if self.suggeststhreads():
self.waitforthread()
thread = threadutil.InstanceLimitedThread(
self.getinstancelimitnamespace(),
target=self.copymessageto,
name="Copy message from %s:%s"% (self.repository, self),
args=(uid, dstfolder, statusfolder)
)
thread.start()
threads.append(thread)
else:
self.copymessageto(uid, dstfolder, statusfolder, register=0)
for thread in threads:
thread.join() # Block until all "copy" threads are done.
# Execute new mail hook if we have new mail.
if self.have_newmail:

View File

@ -92,6 +92,20 @@ class LocalStatusSQLiteFolder(BaseFolder):
LocalStatusSQLiteFolder.locks[self.filename] = DatabaseFileLock()
self._databaseFileLock = LocalStatusSQLiteFolder.locks[self.filename]
self._in_transactions = 0
def __enter__(self):
if not self.dofsync():
assert self.connection is not None
self._in_transactions += 1
def __exit__(self, exc_type, exc_val, exc_tb):
if not self.dofsync():
assert self._in_transactions > 0
self._in_transactions -= 1
if self._in_transactions < 1:
self.connection.commit()
def openfiles(self):
# Make sure sqlite is in multithreading SERIALIZE mode.
assert sqlite.threadsafety == 1, 'Your sqlite is not multithreading safe.'
@ -169,7 +183,8 @@ class LocalStatusSQLiteFolder(BaseFolder):
else:
self.connection.execute(sql, args)
success = True
self.connection.commit()
if not self._in_transactions:
self.connection.commit()
except sqlite.OperationalError as e:
if e.args[0] == 'cannot commit - no transaction is active':
pass

View File

@ -60,7 +60,6 @@ class MaildirFolder(BaseFolder):
def __init__(self, root, name, sep, repository):
self.sep = sep # needs to be set before super().__init__
super(MaildirFolder, self).__init__(name, repository)
self.dofsync = self.config.getdefaultboolean("general", "fsync", True)
self.root = root
# check if we should use a different infosep to support Win file systems
self.wincompatible = self.config.getdefaultboolean(
@ -330,7 +329,7 @@ class MaildirFolder(BaseFolder):
fd.write(content)
# Make sure the data hits the disk.
fd.flush()
if self.dofsync:
if self.dofsync():
os.fsync(fd)
fd.close()

View File

@ -34,7 +34,6 @@ class MappedIMAPFolder(IMAPFolder):
Instance variables (self.):
dryrun: boolean.
dofsync: boolean for fsync calls.
r2l: dict mapping message uids: self.r2l[remoteuid]=localuid
l2r: dict mapping message uids: self.r2l[localuid]=remoteuid
#TODO: what is the difference, how are they used?
@ -44,7 +43,6 @@ class MappedIMAPFolder(IMAPFolder):
def __init__(self, *args, **kwargs):
IMAPFolder.__init__(self, *args, **kwargs)
self.dryrun = self.config.getdefaultboolean("general", "dry-run", True)
self.dofsync = self.config.getdefaultboolean("general", "fsync", True)
self.maplock = Lock()
self.diskr2l, self.diskl2r = self._loadmaps()
self.r2l, self.l2r = None, None
@ -114,7 +112,7 @@ class MappedIMAPFolder(IMAPFolder):
with open(mapfilenametmp, 'wt') as mapfilefd:
for (key, value) in self.diskl2r.items():
mapfilefd.write("%d:%d\n"% (key, value))
if self.dofsync is True:
if self.dofsync():
fsync(mapfilefd)
# The lock is released when the file descriptor ends.
shutil.move(mapfilenametmp, mapfilename)