/* ** Copyright (C) 2008-2020 Dirk-Jan C. Binnema ** ** This program is free software; you can redistribute it and/or modify 1** it under the terms of the GNU General Public License as published by ** the Free Software Foundation; either version 3 of the License, or ** (at your option) any later version. ** ** This program is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ** GNU General Public License for more details. ** ** You should have received a copy of the GNU General Public License ** along with this program; if not, write to the Free Software Foundation, ** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ** */ #include "config.h" #include "mu-index.h" #include #include #include #include #include #include #include "mu-maildir.h" #define MU_LAST_USED_MAILDIR_KEY "last_used_maildir" #define MU_INDEX_MAX_FILE_SIZE (500*1000*1000) /* 500 Mb */ /* apparently, people are getting really big mails, so let us index those (by * default)*/ struct _MuIndex { MuStore *_store; gboolean _needs_reindex; guint _max_filesize; }; MuIndex* mu_index_new (MuStore *store, GError **err) { MuIndex *index; unsigned count; g_return_val_if_fail (store, NULL); g_return_val_if_fail (!mu_store_is_read_only(store), NULL); index = g_new0 (MuIndex, 1); index->_store = mu_store_ref (store); /* set the default max file size */ index->_max_filesize = MU_INDEX_MAX_FILE_SIZE; count = mu_store_count (store, err); if (count == (unsigned)-1) return NULL; else if (count == 0) index->_needs_reindex = TRUE; return index; } void mu_index_destroy (MuIndex *index) { if (!index) return; mu_store_unref (index->_store); g_free (index); } struct _MuIndexCallbackData { MuIndexMsgCallback _idx_msg_cb; MuIndexDirCallback _idx_dir_cb; MuStore* _store; void* _user_data; MuIndexStats* _stats; gboolean _reindex; gboolean _lazy_check; time_t _dirstamp; guint _max_filesize; }; typedef struct _MuIndexCallbackData MuIndexCallbackData; /* checks to determine if we need to (re)index this message note: * simply checking timestamps is not good enough because message may * be moved from other dirs (e.g. from 'new' to 'cur') and the time * stamps won't change. */ static inline gboolean needs_index (MuIndexCallbackData *data, const char *fullpath, time_t filestamp) { /* unconditionally reindex */ if (data->_reindex) return TRUE; /* it's not in the database yet */ if (!mu_store_contains_message (data->_store, fullpath)) return TRUE; /* it's there, but it's not up to date */ if ((unsigned)filestamp >= (unsigned)data->_dirstamp) return TRUE; return FALSE; /* index not needed */ } static MuError insert_or_update_maybe (const char *fullpath, const char *mdir, time_t filestamp, MuIndexCallbackData *data, gboolean *updated) { MuMsg *msg; GError *err; gboolean rv; *updated = FALSE; if (!needs_index (data, fullpath, filestamp)) return MU_OK; /* nothing to do for this one */ err = NULL; msg = mu_msg_new_from_file (fullpath, mdir, &err); if (!msg) { if (!err) g_warning ("error creating message object: %s", fullpath); else { g_warning ("%s", err->message); g_clear_error (&err); } /* warn, then simply continue */ return MU_OK; } /* we got a valid id; scan the message contents as well */ rv = mu_store_add_msg (data->_store, msg, &err); mu_msg_unref (msg); if (!rv) { g_warning ("error storing message object: %s", err ? err->message : "cause unknown"); g_clear_error (&err); return MU_ERROR; } *updated = TRUE; return MU_OK; } static MuError run_msg_callback_maybe (MuIndexCallbackData *data) { MuError result; if (!data || !data->_idx_msg_cb) return MU_OK; result = data->_idx_msg_cb (data->_stats, data->_user_data); if (G_UNLIKELY(result != MU_OK && result != MU_STOP)) g_warning ("error in callback"); return result; } static MuError on_run_maildir_msg (const char *fullpath, const char *mdir, struct stat *statbuf, MuIndexCallbackData *data) { MuError result; gboolean updated; /* protect against too big messages */ if (G_UNLIKELY(statbuf->st_size > data->_max_filesize)) { g_warning ("ignoring because bigger than %u bytes: %s", data->_max_filesize, fullpath); return MU_OK; /* not an error */ } result = run_msg_callback_maybe (data); if (result != MU_OK) return result; /* see if we need to update/insert anything... * use the ctime, so any status change will be visible (perms, * filename etc.)*/ result = insert_or_update_maybe (fullpath, mdir, statbuf->st_ctime, data, &updated); if (result == MU_OK && data && data->_stats) { /* update statistics */ ++data->_stats->_processed; updated ? ++data->_stats->_updated : ++data->_stats->_uptodate; } return result; } static time_t get_dir_timestamp (const char *path) { struct stat statbuf; if (stat (path, &statbuf) != 0) { g_warning ("failed to stat %s: %s", path, strerror(errno)); return 0; } return statbuf.st_ctime; } static MuError on_run_maildir_dir (const char* fullpath, gboolean enter, MuIndexCallbackData *data) { GError *err; err = NULL; /* xapian stores a per-dir timestamp; we use this timestamp to determine * whether a message is up-to-date */ if (enter) { data->_dirstamp = mu_store_get_dirstamp (data->_store, fullpath, &err); /* in 'lazy' mode, we only check the dir timestamp, and if it's * up to date, we don't bother with this dir. This fails to * account for messages below this dir that have merely * _changed_ though */ if (data->_lazy_check && mu_maildir_is_leaf_dir(fullpath)) { time_t dirstamp; dirstamp = get_dir_timestamp (fullpath); if (dirstamp <= data->_dirstamp) { g_debug ("ignore %s (up-to-date)", fullpath); return MU_IGNORE; } } g_debug ("entering %s", fullpath); } else { mu_store_set_dirstamp (data->_store, fullpath, time(NULL), &err); g_debug ("leaving %s", fullpath); } if (data->_idx_dir_cb) return data->_idx_dir_cb (fullpath, enter, data->_user_data); if (err) { g_warning("%s: error handling %s: %s", __func__, fullpath, err->message); g_clear_error(&err); } return MU_OK; } static gboolean check_path (const char *path) { g_return_val_if_fail (path, FALSE); if (!g_path_is_absolute (path)) { g_warning ("%s: not an absolute path: '%s'", __func__, path); return FALSE; } if (access (path, R_OK) != 0) { g_warning ("%s: cannot open '%s': %s", __func__, path, strerror (errno)); return FALSE; } return TRUE; } static void init_cb_data (MuIndexCallbackData *cb_data, MuStore *xapian, gboolean reindex, gboolean lazycheck, guint max_filesize, MuIndexStats *stats, MuIndexMsgCallback msg_cb, MuIndexDirCallback dir_cb, void *user_data) { cb_data->_idx_msg_cb = msg_cb; cb_data->_idx_dir_cb = dir_cb; cb_data->_user_data = user_data; cb_data->_store = xapian; cb_data->_reindex = reindex; cb_data->_lazy_check = lazycheck; cb_data->_dirstamp = 0; cb_data->_max_filesize = max_filesize; cb_data->_stats = stats; if (cb_data->_stats) memset (cb_data->_stats, 0, sizeof(MuIndexStats)); } void mu_index_set_max_msg_size (MuIndex *index, guint max_size) { g_return_if_fail (index); if (max_size == 0) index->_max_filesize = MU_INDEX_MAX_FILE_SIZE; else index->_max_filesize = max_size; } MuError mu_index_run (MuIndex *index, gboolean reindex, gboolean lazycheck, MuIndexStats *stats, MuIndexMsgCallback msg_cb, MuIndexDirCallback dir_cb, void *user_data) { MuIndexCallbackData cb_data; MuError rv; const char *path; g_return_val_if_fail (index && index->_store, MU_ERROR); g_return_val_if_fail (msg_cb, MU_ERROR); path = mu_store_root_maildir (index->_store); if (!check_path (path)) return MU_ERROR; if (index->_needs_reindex) reindex = TRUE; init_cb_data (&cb_data, index->_store, reindex, lazycheck, index->_max_filesize, stats, msg_cb, dir_cb, user_data); rv = mu_maildir_walk (path, (MuMaildirWalkMsgCallback)on_run_maildir_msg, (MuMaildirWalkDirCallback)on_run_maildir_dir, reindex, /* re-index, ie. do a full update */ &cb_data); mu_store_flush (index->_store); return rv; } static MuError on_stats_maildir_file (const char *fullpath, const char *mdir, struct stat *statbuf, MuIndexCallbackData *cb_data) { MuError result; if (cb_data && cb_data->_idx_msg_cb) result = cb_data->_idx_msg_cb (cb_data->_stats, cb_data->_user_data); else result = MU_OK; if (result == MU_OK) { if (cb_data->_stats) ++cb_data->_stats->_processed; return MU_OK; } return result; /* MU_STOP or MU_OK */ } MuError mu_index_stats (MuIndex *index, MuIndexStats *stats, MuIndexMsgCallback cb_msg, MuIndexDirCallback cb_dir, void *user_data) { const char *path; MuIndexCallbackData cb_data; g_return_val_if_fail (index, MU_ERROR); g_return_val_if_fail (cb_msg, MU_ERROR); path = mu_store_root_maildir (index->_store); if (!check_path (path)) return MU_ERROR; if (stats) memset (stats, 0, sizeof(MuIndexStats)); cb_data._idx_msg_cb = cb_msg; cb_data._idx_dir_cb = cb_dir; cb_data._stats = stats; cb_data._user_data = user_data; cb_data._dirstamp = 0; return mu_maildir_walk (path, (MuMaildirWalkMsgCallback)on_stats_maildir_file, NULL, FALSE, &cb_data); } struct _CleanupData { MuStore *_store; MuIndexStats *_stats; MuIndexCleanupDeleteCallback _cb; void *_user_data; }; typedef struct _CleanupData CleanupData; static MuError foreach_doc_cb (const char* path, CleanupData *cudata) { if (access (path, R_OK) != 0) { if (errno != EACCES) g_debug ("cannot access %s: %s", path, strerror(errno)); if (!mu_store_remove_path (cudata->_store, path)) return MU_ERROR; /* something went wrong... bail out */ if (cudata->_stats) ++cudata->_stats->_cleaned_up; } if (cudata->_stats) ++cudata->_stats->_processed; if (!cudata->_cb) return MU_OK; return cudata->_cb (cudata->_stats, cudata->_user_data); } MuError mu_index_cleanup (MuIndex *index, MuIndexStats *stats, MuIndexCleanupDeleteCallback cb, void *user_data, GError **err) { MuError rv; CleanupData cudata; g_return_val_if_fail (index, MU_ERROR); cudata._store = index->_store; cudata._stats = stats; cudata._cb = cb; cudata._user_data = user_data; rv = mu_store_foreach (index->_store, (MuStoreForeachFunc)foreach_doc_cb, &cudata, err); mu_store_flush (index->_store); return rv; } gboolean mu_index_stats_clear (MuIndexStats *stats) { if (!stats) return FALSE; memset (stats, 0, sizeof(MuIndexStats)); return TRUE; }