mirror of
https://github.com/djcb/mu.git
synced 2024-06-21 06:56:48 +02:00
mu: add '--lazy-check' option for indexing
Add an option --lazy-check to ignore any directories that don't have their ctime changed since the last indexing operation. There are a few corner-cases (such as editing a message outside mu's control) where this might miss a change, but apart from that, makes indexing in for a maildir (and its sub-maildirs) almost a no-op if there were no changes.
This commit is contained in:
parent
2a83b02ce2
commit
9477071e63
|
@ -89,14 +89,15 @@ mu_index_destroy (MuIndex *index)
|
|||
|
||||
|
||||
struct _MuIndexCallbackData {
|
||||
MuIndexMsgCallback _idx_msg_cb;
|
||||
MuIndexDirCallback _idx_dir_cb;
|
||||
MuStore* _store;
|
||||
void* _user_data;
|
||||
MuIndexStats* _stats;
|
||||
gboolean _reindex;
|
||||
time_t _dirstamp;
|
||||
guint _max_filesize;
|
||||
MuIndexMsgCallback _idx_msg_cb;
|
||||
MuIndexDirCallback _idx_dir_cb;
|
||||
MuStore* _store;
|
||||
void* _user_data;
|
||||
MuIndexStats* _stats;
|
||||
gboolean _reindex;
|
||||
gboolean _lazy_check;
|
||||
time_t _dirstamp;
|
||||
guint _max_filesize;
|
||||
};
|
||||
typedef struct _MuIndexCallbackData MuIndexCallbackData;
|
||||
|
||||
|
@ -216,30 +217,51 @@ on_run_maildir_msg (const char *fullpath, const char *mdir,
|
|||
return result;
|
||||
}
|
||||
|
||||
static time_t
|
||||
get_dir_timestamp (const char *path)
|
||||
{
|
||||
struct stat statbuf;
|
||||
|
||||
if (stat (path, &statbuf) != 0) {
|
||||
g_warning ("failed to stat %s: %s",
|
||||
path, strerror(errno));
|
||||
return 0;
|
||||
}
|
||||
|
||||
return statbuf.st_ctime;
|
||||
}
|
||||
|
||||
static MuError
|
||||
on_run_maildir_dir (const char* fullpath, gboolean enter,
|
||||
MuIndexCallbackData *data)
|
||||
{
|
||||
GError *err;
|
||||
|
||||
err = NULL;
|
||||
|
||||
/* xapian stores a per-dir timestamp; we use this timestamp
|
||||
* to determine whether a message is up-to-data
|
||||
/* xapian stores a per-dir timestamp; we use this timestamp to determine
|
||||
* whether a message is up-to-date
|
||||
*/
|
||||
if (enter) {
|
||||
data->_dirstamp =
|
||||
mu_store_get_timestamp (data->_store, fullpath, &err);
|
||||
g_debug ("entering %s (ts==%u)",
|
||||
fullpath, (unsigned)data->_dirstamp);
|
||||
/* in 'lazy' mode, we only check the dir timestamp, and if it's
|
||||
* up to date, we don't bother with this dir. This fails to
|
||||
* account for messages below this dir that have merely
|
||||
* _changed_ though */
|
||||
if (data->_lazy_check && mu_maildir_is_leaf_dir(fullpath)) {
|
||||
time_t dirstamp;
|
||||
dirstamp = get_dir_timestamp (fullpath);
|
||||
if (dirstamp <= data->_dirstamp) {
|
||||
g_debug ("ignore %s (up-to-date)", fullpath);
|
||||
return MU_IGNORE;
|
||||
}
|
||||
}
|
||||
g_debug ("entering %s", fullpath);
|
||||
} else {
|
||||
time_t now;
|
||||
now = time (NULL);
|
||||
|
||||
mu_store_set_timestamp (data->_store, fullpath,
|
||||
now, &err);
|
||||
g_debug ("leaving %s (ts=%u)",
|
||||
fullpath, (unsigned)data->_dirstamp);
|
||||
time(NULL), &err);
|
||||
g_debug ("leaving %s", fullpath);
|
||||
}
|
||||
|
||||
if (data->_idx_dir_cb)
|
||||
|
@ -276,7 +298,8 @@ check_path (const char *path)
|
|||
|
||||
static void
|
||||
init_cb_data (MuIndexCallbackData *cb_data, MuStore *xapian,
|
||||
gboolean reindex, guint max_filesize, MuIndexStats *stats,
|
||||
gboolean reindex, gboolean lazycheck,
|
||||
guint max_filesize, MuIndexStats *stats,
|
||||
MuIndexMsgCallback msg_cb, MuIndexDirCallback dir_cb,
|
||||
void *user_data)
|
||||
{
|
||||
|
@ -286,9 +309,10 @@ init_cb_data (MuIndexCallbackData *cb_data, MuStore *xapian,
|
|||
cb_data->_user_data = user_data;
|
||||
cb_data->_store = xapian;
|
||||
|
||||
cb_data->_reindex = reindex;
|
||||
cb_data->_dirstamp = 0;
|
||||
cb_data->_max_filesize = max_filesize;
|
||||
cb_data->_reindex = reindex;
|
||||
cb_data->_lazy_check = lazycheck;
|
||||
cb_data->_dirstamp = 0;
|
||||
cb_data->_max_filesize = max_filesize;
|
||||
|
||||
cb_data->_stats = stats;
|
||||
if (cb_data->_stats)
|
||||
|
@ -318,7 +342,8 @@ mu_index_set_xbatch_size (MuIndex *index, guint xbatchsize)
|
|||
|
||||
MuError
|
||||
mu_index_run (MuIndex *index, const char *path,
|
||||
gboolean reindex, MuIndexStats *stats,
|
||||
gboolean reindex, gboolean lazycheck,
|
||||
MuIndexStats *stats,
|
||||
MuIndexMsgCallback msg_cb, MuIndexDirCallback dir_cb,
|
||||
void *user_data)
|
||||
{
|
||||
|
@ -336,7 +361,7 @@ mu_index_run (MuIndex *index, const char *path,
|
|||
return MU_ERROR;
|
||||
}
|
||||
|
||||
init_cb_data (&cb_data, index->_store, reindex,
|
||||
init_cb_data (&cb_data, index->_store, reindex, lazycheck,
|
||||
index->_max_filesize, stats,
|
||||
msg_cb, dir_cb, user_data);
|
||||
|
||||
|
@ -396,7 +421,7 @@ mu_index_stats (MuIndex *index, const char *path,
|
|||
cb_data._stats = stats;
|
||||
cb_data._user_data = user_data;
|
||||
|
||||
cb_data._dirstamp = 0;
|
||||
cb_data._dirstamp = 0;
|
||||
|
||||
return mu_maildir_walk (path,
|
||||
(MuMaildirWalkMsgCallback)on_stats_maildir_file,
|
||||
|
@ -404,10 +429,10 @@ mu_index_stats (MuIndex *index, const char *path,
|
|||
}
|
||||
|
||||
struct _CleanupData {
|
||||
MuStore *_store;
|
||||
MuIndexStats *_stats;
|
||||
MuIndexCleanupDeleteCallback _cb;
|
||||
void *_user_data;
|
||||
MuStore *_store;
|
||||
MuIndexStats *_stats;
|
||||
MuIndexCleanupDeleteCallback _cb;
|
||||
void *_user_data;
|
||||
|
||||
};
|
||||
typedef struct _CleanupData CleanupData;
|
||||
|
@ -440,8 +465,8 @@ mu_index_cleanup (MuIndex *index, MuIndexStats *stats,
|
|||
MuIndexCleanupDeleteCallback cb,
|
||||
void *user_data, GError **err)
|
||||
{
|
||||
MuError rv;
|
||||
CleanupData cudata;
|
||||
MuError rv;
|
||||
CleanupData cudata;
|
||||
|
||||
g_return_val_if_fail (index, MU_ERROR);
|
||||
|
||||
|
|
|
@ -119,6 +119,8 @@ typedef MuError (*MuIndexDirCallback) (const char* path, gboolean enter,
|
|||
* @param path the path to index. This must be an absolute path
|
||||
* @param force if != 0, force re-indexing already index messages; this is
|
||||
* obviously a lot slower than only indexing new/changed messages
|
||||
* @param lazycheck whether ignore subdirectoryies that have up-to-date
|
||||
* timestamps.
|
||||
* @param stats a structure with some statistics about the results;
|
||||
* note that this function does *not* reset the struct values to allow
|
||||
* for cumulative stats from multiple calls. If needed, you can use
|
||||
|
@ -132,12 +134,13 @@ typedef MuError (*MuIndexDirCallback) (const char* path, gboolean enter,
|
|||
* case of some error.
|
||||
*/
|
||||
MuError mu_index_run (MuIndex *index, const char *path, gboolean force,
|
||||
MuIndexStats *stats, MuIndexMsgCallback msg_cb,
|
||||
gboolean lazycheck, MuIndexStats *stats,
|
||||
MuIndexMsgCallback msg_cb,
|
||||
MuIndexDirCallback dir_cb, void *user_data);
|
||||
|
||||
/**
|
||||
* gather some statistics about the Maildir; this is usually much faster
|
||||
* than mu_index_run, and can thus be used to provide some information to the user
|
||||
* gather some statistics about the Maildir; this is usually much faster than
|
||||
* mu_index_run, and can thus be used to provide some information to the user
|
||||
* note though that the statistics may be different from the reality that
|
||||
* mu_index_run sees, when there are updates in the Maildir
|
||||
*
|
||||
|
|
|
@ -248,21 +248,20 @@ process_file (const char* fullpath, const gchar* mdir,
|
|||
* determine if path is a maildir leaf-dir; ie. if it's 'cur' or 'new'
|
||||
* (we're skipping 'tmp' for obvious reasons)
|
||||
*/
|
||||
G_GNUC_CONST static gboolean
|
||||
is_maildir_new_or_cur (const char *path)
|
||||
gboolean
|
||||
mu_maildir_is_leaf_dir (const char *path)
|
||||
{
|
||||
size_t len;
|
||||
|
||||
g_return_val_if_fail (path, FALSE);
|
||||
|
||||
/* path is the full path; it cannot possibly be shorter
|
||||
* than 4 for a maildir (/cur or /new) */
|
||||
len = strlen (path);
|
||||
len = path ? strlen (path) : 0;
|
||||
if (G_UNLIKELY(len < 4))
|
||||
return FALSE;
|
||||
|
||||
/* optimization; one further idea would be cast the 4 bytes to an integer
|
||||
* and compare that -- need to think about alignment, endianness */
|
||||
/* optimization; one further idea would be cast the 4 bytes to an
|
||||
* integer and compare that -- need to think about alignment,
|
||||
* endianness */
|
||||
|
||||
if (path[len - 4] == G_DIR_SEPARATOR &&
|
||||
path[len - 3] == 'c' &&
|
||||
|
@ -415,7 +414,7 @@ process_dir_entry (const char* path, const char* mdir, struct dirent *entry,
|
|||
|
||||
switch (d_type) {
|
||||
case DT_REG: /* we only want files in cur/ and new/ */
|
||||
if (!is_maildir_new_or_cur (path))
|
||||
if (!mu_maildir_is_leaf_dir (path))
|
||||
return MU_OK;
|
||||
|
||||
return process_file (fullpath, mdir, cb_msg, data);
|
||||
|
@ -522,7 +521,7 @@ process_dir (const char* path, const char* mdir,
|
|||
gboolean full, void *data)
|
||||
{
|
||||
MuError result;
|
||||
DIR* dir;
|
||||
DIR* dir;
|
||||
|
||||
/* if it has a noindex file, we ignore this dir */
|
||||
if (dir_contains_file (path, MU_MAILDIR_NOINDEX_FILE) ||
|
||||
|
@ -531,27 +530,30 @@ process_dir (const char* path, const char* mdir,
|
|||
return MU_OK;
|
||||
}
|
||||
|
||||
if (dir_cb) {
|
||||
MuError rv;
|
||||
rv = dir_cb (path, TRUE/*enter*/, data);
|
||||
/* ignore this dir; not necessarily an _error_, dir might
|
||||
* be up-to-date and return MU_IGNORE */
|
||||
if (rv == MU_IGNORE)
|
||||
return MU_OK;
|
||||
else if (rv != MU_OK)
|
||||
return rv;
|
||||
}
|
||||
|
||||
dir = opendir (path);
|
||||
if (!dir) {
|
||||
g_warning ("cannot access %s: %s", path, strerror(errno));
|
||||
return MU_OK;
|
||||
}
|
||||
|
||||
if (dir_cb) {
|
||||
MuError rv;
|
||||
rv = dir_cb (path, TRUE, data);
|
||||
if (rv != MU_OK) {
|
||||
closedir (dir);
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
|
||||
result = process_dir_entries (dir, path, mdir, msg_cb, dir_cb, full, data);
|
||||
result = process_dir_entries (dir, path, mdir, msg_cb, dir_cb,
|
||||
full, data);
|
||||
closedir (dir);
|
||||
|
||||
/* only run dir_cb if it exists and so far, things went ok */
|
||||
if (dir_cb && result == MU_OK)
|
||||
return dir_cb (path, FALSE, data);
|
||||
return dir_cb (path, FALSE/*leave*/, data);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@ -798,15 +800,16 @@ mu_maildir_get_new_path (const char *oldpath, const char *new_mdir,
|
|||
if (new_name)
|
||||
mfile = get_new_basename ();
|
||||
else {
|
||||
/* determine the name of the mailfile, stripped of its flags, as well
|
||||
* as any custom (non-standard) flags */
|
||||
/* determine the name of the mailfile, stripped of its flags, as
|
||||
* well as any custom (non-standard) flags */
|
||||
char *cur;
|
||||
mfile = g_path_get_basename (oldpath);
|
||||
for (cur = &mfile[strlen(mfile)-1]; cur > mfile; --cur) {
|
||||
if ((*cur == ':' || *cur == '!') &&
|
||||
(cur[1] == '2' && cur[2] == ',')) {
|
||||
/* get the custom flags (if any) */
|
||||
custom_flags = mu_flags_custom_from_str (cur + 3);
|
||||
custom_flags =
|
||||
mu_flags_custom_from_str (cur + 3);
|
||||
cur[0] = '\0'; /* strip the flags */
|
||||
break;
|
||||
}
|
||||
|
@ -839,8 +842,6 @@ get_file_size (const char* path)
|
|||
}
|
||||
|
||||
|
||||
|
||||
|
||||
static gboolean
|
||||
msg_move_check_pre (const gchar *src, const gchar *dst, GError **err)
|
||||
{
|
||||
|
|
|
@ -129,6 +129,15 @@ MuError mu_maildir_walk (const char *path, MuMaildirWalkMsgCallback cb_msg,
|
|||
gboolean mu_maildir_clear_links (const gchar* dir, GError **err);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* whether the directory path ends in '/cur/' or '/new/'
|
||||
*
|
||||
* @param path some path
|
||||
*/
|
||||
gboolean mu_maildir_is_leaf_dir (const char *path);
|
||||
|
||||
|
||||
/**
|
||||
* get the Maildir flags from the full path of a mailfile. The flags
|
||||
* are as specified in http://cr.yp.to/proto/maildir.html, plus
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/* -*-mode: c; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-*/
|
||||
|
||||
/*
|
||||
** Copyright (C) 2008-2013 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||
** Copyright (C) 2008-2016 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||
**
|
||||
** This program is free software; you can redistribute it and/or modify it
|
||||
** under the terms of the GNU General Public License as published by the
|
||||
|
@ -49,24 +49,24 @@ sig_handler (int sig)
|
|||
"press again to kill immediately");
|
||||
}
|
||||
|
||||
MU_CAUGHT_SIGNAL = TRUE;
|
||||
MU_CAUGHT_SIGNAL = TRUE;
|
||||
}
|
||||
|
||||
static void
|
||||
install_sig_handler (void)
|
||||
{
|
||||
struct sigaction action;
|
||||
int i, sigs[] = { SIGINT, SIGHUP, SIGTERM };
|
||||
struct sigaction action;
|
||||
int i, sigs[] = { SIGINT, SIGHUP, SIGTERM };
|
||||
|
||||
MU_CAUGHT_SIGNAL = FALSE;
|
||||
MU_CAUGHT_SIGNAL = FALSE;
|
||||
|
||||
action.sa_handler = sig_handler;
|
||||
sigemptyset(&action.sa_mask);
|
||||
action.sa_flags = SA_RESETHAND;
|
||||
action.sa_handler = sig_handler;
|
||||
sigemptyset(&action.sa_mask);
|
||||
action.sa_flags = SA_RESETHAND;
|
||||
|
||||
for (i = 0; i != G_N_ELEMENTS(sigs); ++i)
|
||||
if (sigaction (sigs[i], &action, NULL) != 0)
|
||||
g_critical ("set sigaction for %d failed: %s",
|
||||
for (i = 0; i != G_N_ELEMENTS(sigs); ++i)
|
||||
if (sigaction (sigs[i], &action, NULL) != 0)
|
||||
g_critical ("set sigaction for %d failed: %s",
|
||||
sigs[i], strerror (errno));;
|
||||
}
|
||||
|
||||
|
@ -319,7 +319,8 @@ cmd_index (MuIndex *midx, MuConfig *opts, MuIndexStats *stats, GError **err)
|
|||
|
||||
newline_before_on();
|
||||
|
||||
rv = mu_index_run (midx, opts->maildir, opts->rebuild, stats,
|
||||
rv = mu_index_run (midx, opts->maildir, opts->rebuild,
|
||||
opts->lazycheck, stats,
|
||||
show_progress ?
|
||||
(MuIndexMsgCallback)index_msg_cb :
|
||||
(MuIndexMsgCallback)index_msg_silent_cb,
|
||||
|
|
|
@ -148,6 +148,8 @@ config_options_group_index (void)
|
|||
"top of the maildir", "<maildir>"},
|
||||
{"rebuild", 0, 0, G_OPTION_ARG_NONE, &MU_CONFIG.rebuild,
|
||||
"rebuild the database from scratch (false)", NULL},
|
||||
{"lazy-check", 0, 0, G_OPTION_ARG_NONE, &MU_CONFIG.lazycheck,
|
||||
"only check dir-timestamps (false)", NULL},
|
||||
{"my-address", 0, 0, G_OPTION_ARG_STRING_ARRAY,
|
||||
&MU_CONFIG.my_addresses,
|
||||
"my e-mail address (regexp); can be used multiple times",
|
||||
|
@ -307,7 +309,7 @@ config_options_group_script (void)
|
|||
GOptionEntry entries[] = {
|
||||
{G_OPTION_REMAINING, 0,0, G_OPTION_ARG_STRING_ARRAY,
|
||||
&MU_CONFIG.params, "script parameters", NULL},
|
||||
{NULL, 0, 0, 0, NULL, NULL, NULL}
|
||||
{NULL, 0, 0, 0, NULL, NULL, NULL}
|
||||
};
|
||||
|
||||
og = g_option_group_new("script", "Options for the 'script' command",
|
||||
|
|
|
@ -112,6 +112,8 @@ struct _MuConfig {
|
|||
gboolean rebuild; /* empty the database before indexing */
|
||||
gboolean autoupgrade; /* automatically upgrade db
|
||||
* when needed */
|
||||
gboolean lazycheck; /* don't check dirs with up-to-date
|
||||
* timestamps */
|
||||
int xbatchsize; /* batchsize for xapian
|
||||
* commits, or 0 for
|
||||
* default */
|
||||
|
|
Loading…
Reference in New Issue
Block a user