From 8193cc3e4ccd322bc0b7e3439b778564f61f62ea Mon Sep 17 00:00:00 2001 From: djcb Date: Sun, 28 Apr 2019 13:57:07 +0300 Subject: [PATCH] contacts: rework: install in database Rewrite the contacts-cache backend in c++ Store the contacts as metadata in the xapian database, rather than in a separate file. Update the Store to deal with this. --- lib/Makefile.am | 3 +- lib/mu-contacts.c | 522 ------------------------------------------ lib/mu-contacts.cc | 282 +++++++++++++++++++++++ lib/mu-contacts.h | 96 ++------ lib/mu-contacts.hh | 155 +++++++++++++ lib/mu-store-priv.hh | 79 +++---- lib/mu-store-read.cc | 13 ++ lib/mu-store-write.cc | 37 ++- lib/mu-store.h | 11 + 9 files changed, 525 insertions(+), 673 deletions(-) delete mode 100644 lib/mu-contacts.c create mode 100644 lib/mu-contacts.cc create mode 100644 lib/mu-contacts.hh diff --git a/lib/Makefile.am b/lib/Makefile.am index 4a2040fd..d42ac51a 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -63,7 +63,8 @@ noinst_LTLIBRARIES= \ libmu_la_SOURCES= \ mu-bookmarks.c \ mu-bookmarks.h \ - mu-contacts.c \ + mu-contacts.cc \ + mu-contacts.hh \ mu-contacts.h \ mu-container.c \ mu-container.h \ diff --git a/lib/mu-contacts.c b/lib/mu-contacts.c deleted file mode 100644 index 7df30361..00000000 --- a/lib/mu-contacts.c +++ /dev/null @@ -1,522 +0,0 @@ -/* -*-mode: c; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-*/ -/* -** Copyright (C) 2008-2016 Dirk-Jan C. Binnema -** -** This program is free software; you can redistribute it and/or modify it -** under the terms of the GNU General Public License as published by the -** Free Software Foundation; either version 3, or (at your option) any -** later version. -** -** This program is distributed in the hope that it will be useful, -** but WITHOUT ANY WARRANTY; without even the implied warranty of -** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -** GNU General Public License for more details. -** -** You should have received a copy of the GNU General Public License -** along with this program; if not, write to the Free Software Foundation, -** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -** -*/ - -#include -#include -#include -#include - -#include "mu-contacts.h" -#include "mu-util.h" -#include "mu-str.h" - -#define EMAIL_KEY "email" -#define NAME_KEY "name" -#define TSTAMP_KEY "tstamp" -#define PERSONAL_KEY "personal" -#define FREQ_KEY "frequency" - -/* note: 'personal' here means a mail where my e-mail addresses is explicitly - * in one of the address fields, ie., it's not some mailing list message */ -struct _ContactInfo { - gchar *_name, *_email; - gboolean _personal; - time_t _tstamp; - unsigned _freq; -}; -typedef struct _ContactInfo ContactInfo; - -static void contact_info_destroy (ContactInfo *cinfo); -static ContactInfo *contact_info_new (char *email, char *name, - gboolean personal, time_t tstamp, unsigned freq); - -struct _MuContacts { - GKeyFile *_ccache; - gchar *_path; - - GHashTable *_hash; - gboolean _dirty; -}; - -static GKeyFile* -load_key_file (const char *path) -{ - GError *err; - GKeyFile *keyfile; - gboolean file_exists; - - err = NULL; - - /* of course this is racy, but it's only for giving more - * meaningful errors to users */ - file_exists = TRUE; - if (access(path, F_OK) != 0) { - if (errno != ENOENT) { - g_warning ("cannot open %s: %s", path, strerror(errno)); - return NULL; - } - file_exists = FALSE; - } - - err = NULL; - keyfile = g_key_file_new (); - - if (file_exists && !g_key_file_load_from_file - (keyfile, path, G_KEY_FILE_KEEP_COMMENTS, &err)) { - g_warning ("could not load keyfile %s: %s", path, err->message); - g_error_free (err); - g_key_file_free (keyfile); - return NULL; - } - return keyfile; -} - - -static gboolean -get_values (GKeyFile *kfile, const gchar *group, - gchar **email, gchar **name, gboolean *personal, size_t *tstamp, - unsigned *freq) -{ - GError *err; - err = NULL; - - do { - int i; - - *email = g_key_file_get_value (kfile, group, EMAIL_KEY, &err); - if (!*email) - break; - - *tstamp = (time_t)g_key_file_get_integer (kfile, group, - TSTAMP_KEY, &err); - if (err) - break; - *personal = g_key_file_get_boolean (kfile, group, - PERSONAL_KEY, NULL); - *name = g_key_file_get_value (kfile, group, NAME_KEY, NULL); - - i = g_key_file_get_integer (kfile, group, FREQ_KEY, NULL); - *freq = (unsigned)(i >= 0) ? i : 1; - - return TRUE; - - } while (0); - - g_warning ("error getting value for %s: %s", - group, err->message ? err->message: "error"); - g_clear_error (&err); - - return FALSE; -} - - -static gboolean -deserialize_cache (MuContacts *self) -{ - gchar **groups; - gsize i, len; - - groups = g_key_file_get_groups (self->_ccache, &len); - for (i = 0; i != len; ++i) { - ContactInfo *cinfo; - char *name, *email; - size_t tstamp; - gboolean personal; - unsigned freq; - if (!get_values (self->_ccache, groups[i], - &email, &name, &personal, &tstamp, &freq)) - continue; /* ignore this one... */ - - cinfo = contact_info_new (email, name, personal, tstamp, freq); - - /* note, we're using the groups[i], so don't free with g_strfreev */ - g_hash_table_insert (self->_hash, groups[i], - cinfo); - } - - g_free (groups); - return TRUE; -} - -static gboolean -set_comment (GKeyFile *kfile) -{ - GError *err; - const char *comment = - " automatically generated -- do not edit"; - - err = NULL; - if (!g_key_file_set_comment (kfile, NULL, NULL, comment, &err)) { - g_warning ("could not write comment to keyfile: %s", - err->message); - g_error_free (err); - return FALSE; - } - - return TRUE; -} - - -MuContacts* -mu_contacts_new (const gchar *path) -{ - MuContacts *self; - - g_return_val_if_fail (path, NULL); - self = g_new0 (MuContacts, 1); - - self->_path = g_strdup (path); - self->_hash = g_hash_table_new_full - (g_str_hash, g_str_equal, g_free, - (GDestroyNotify)contact_info_destroy); - - self->_ccache = load_key_file (path); - if (!self->_ccache || !set_comment (self->_ccache)) { - mu_contacts_destroy (self); - return NULL; - } - deserialize_cache (self); - MU_WRITE_LOG("deserialized contacts from cache %s", - path); - - self->_dirty = FALSE; - return self; -} - - -void -mu_contacts_clear (MuContacts *self) -{ - g_return_if_fail (self); - - if (self->_ccache) - g_key_file_free (self->_ccache); - - g_hash_table_remove_all (self->_hash); - - self->_ccache = g_key_file_new (); - self->_dirty = FALSE; -} - - -/* - * we use the e-mail address to create a key in the GKeyFile, but we - * have to mutilate a bit so that it's (a) *cough* practically-unique - * and (b) valid as a GKeyFile group name (ie., valid utf8, no control - * chars, no '[' or ']') - */ -static const char* -encode_email_address (const char *addr) -{ - static char enc[254 + 1]; /* max size for an e-mail addr */ - char *cur; - - if (!addr) - return FALSE; - - cur = strncpy(enc, addr, sizeof(enc) - 1); - cur[sizeof(enc) - 1] = '\0'; - - /* make sure chars are with {' ' .. '~'}, and not '[' ']' */ - for (; *cur != '\0'; ++cur) - if (!isalnum(*cur)) { - *cur = 'A' + (*cur % ('Z' - 'A')); - } else - *cur = tolower(*cur); - - return enc; -} - - -/* downcase the domain-part of the email address, but only if it - * consists of ascii (to prevent screwing up idna addresses) - */ -static char* -downcase_domain_maybe (const char *addr) -{ - char *addr_conv, *at, *cur; - - addr_conv = g_strdup (addr); - - if (!(at = strchr (addr_conv, '@'))) { /*huh?*/ - g_free (addr_conv); - return NULL; - } - - for (cur = at + 1; *cur; ++cur) { - if (isascii(*cur)) - *cur = g_ascii_tolower (*cur); - else { /* non-ascii; return the unchanged original */ - g_free (addr_conv); - return g_strdup (addr); - } - - } - - return addr_conv; -} - -static void -clear_str (char* str) -{ - if (str) { - mu_str_remove_ctrl_in_place (str); - g_strstrip (str); - } -} - - -gboolean -mu_contacts_add (MuContacts *self, const char *addr, const char *name, - gboolean personal, time_t tstamp) -{ - ContactInfo *cinfo; - const char *group; - - g_return_val_if_fail (self, FALSE); - g_return_val_if_fail (addr, FALSE); - - group = encode_email_address (addr); - - cinfo = (ContactInfo*) g_hash_table_lookup (self->_hash, group); - if (!cinfo) { - char *addr_dc; - if (!(addr_dc = downcase_domain_maybe (addr))) - return FALSE; - cinfo = contact_info_new (addr_dc, - name ? g_strdup(name) : NULL, personal, - tstamp, 1); - g_hash_table_insert (self->_hash, g_strdup(group), cinfo); - } else { - /* if the contact is ever used in a personal way, it's - * personal */ - if (personal) - cinfo->_personal = TRUE; - - if (cinfo->_tstamp < tstamp) { - if (!mu_str_is_empty(name)) { - /* update the name to the last one used, unless it's - * empty*/ - g_free (cinfo->_name); - cinfo->_name = g_strdup (name); - if (cinfo->_name) - mu_str_remove_ctrl_in_place (cinfo->_name); - } - cinfo->_tstamp = tstamp; - } - ++cinfo->_freq; - } - - - self->_dirty = TRUE; - - return TRUE; -} - -struct _EachContactData { - MuContactsForeachFunc _func; - gpointer _user_data; - GRegex *_rx; - size_t _num; -}; -typedef struct _EachContactData EachContactData; - -static void /* email will never be NULL, but ci->_name may be */ -each_contact (const char *group, ContactInfo *ci, EachContactData *ecdata) -{ - if (!ci->_email) - g_warning ("missing email: %u", (unsigned)ci->_tstamp); - - /* ignore this contact if we have a regexp, and it matches - * neither email nor name (if we have a name) */ - while (ecdata->_rx) { /* note, only once */ - if (g_regex_match (ecdata->_rx, ci->_email, 0, NULL)) - break; /* email matches? continue! */ - if (!ci->_name) - return; /* email did not match, no name? ignore this one */ - if (g_regex_match (ecdata->_rx, ci->_name, 0, NULL)) - break; /* name matches? continue! */ - return; /* nothing matched, ignore this one */ - } - - ecdata->_func (ci->_email, ci->_name, ci->_personal, - ci->_tstamp, ci->_freq, ecdata->_user_data); - - ++ecdata->_num; -} - -gboolean -mu_contacts_foreach (MuContacts *self, MuContactsForeachFunc func, - gpointer user_data, const char *pattern, size_t *num) -{ - EachContactData ecdata; - - g_return_val_if_fail (self, FALSE); - g_return_val_if_fail (func, FALSE); - - if (pattern) { - GError *err; - err = NULL; - ecdata._rx = g_regex_new - (pattern, G_REGEX_CASELESS|G_REGEX_OPTIMIZE, - 0, &err); - if (!ecdata._rx) { - g_warning ("error in regexp '%s': %s", - pattern, err->message); - g_error_free (err); - return FALSE; - } - } else - ecdata._rx = NULL; - - ecdata._func = func; - ecdata._user_data = user_data; - ecdata._num = 0; - - g_hash_table_foreach (self->_hash, - (GHFunc)each_contact, - &ecdata); - - if (ecdata._rx) - g_regex_unref (ecdata._rx); - - if (num) - *num = ecdata._num; - - return TRUE; -} - -static void -each_keyval (const char *group, ContactInfo *cinfo, MuContacts *self) -{ - /* use set value so the string do not necessarily have to be - * valid utf-8 */ - if (cinfo->_name) - g_key_file_set_value (self->_ccache, group, NAME_KEY, - cinfo->_name); - - g_key_file_set_value (self->_ccache, group, EMAIL_KEY, - cinfo->_email); - g_key_file_set_boolean (self->_ccache, group, PERSONAL_KEY, - cinfo->_personal); - g_key_file_set_integer (self->_ccache, group, TSTAMP_KEY, - (int)cinfo->_tstamp); - g_key_file_set_integer (self->_ccache, group, FREQ_KEY, - (int)cinfo->_freq); -} - -gboolean -mu_contacts_serialize (MuContacts *self) -{ - gchar *data; - gsize len; - gboolean rv; - - g_return_val_if_fail (self, FALSE); - - g_hash_table_foreach (self->_hash, (GHFunc)each_keyval, self); - - /* Note: err arg is unused */ - data = g_key_file_to_data (self->_ccache, &len, NULL); - if (len) { - GError *err; - err = NULL; - rv = g_file_set_contents (self->_path, data, len, &err); - if (!rv) { - g_warning ("failed to serialize cache to %s: %s", - self->_path, err->message); - g_error_free (err); - } - g_free (data); - } else - rv = TRUE; - - return rv; -} - -void -mu_contacts_destroy (MuContacts *self) -{ - if (!self) - return; - - if (self->_ccache && self->_dirty && - mu_contacts_serialize (self)) - MU_WRITE_LOG("serialized contacts cache %s", - self->_path); - - if (self->_ccache) - g_key_file_free (self->_ccache); - - g_free (self->_path); - - if (self->_hash) - g_hash_table_destroy (self->_hash); - - g_free (self); -} - -/* note, we will *own* the name, email we get, and we'll free them in the - * end... */ -static ContactInfo * -contact_info_new (char *email, char *name, gboolean personal, time_t tstamp, - unsigned freq) -{ - ContactInfo *cinfo; - - /* email should not be NULL, name can */ - g_return_val_if_fail (email, NULL); - - cinfo = g_slice_new (ContactInfo); - - /* we need to clear the strings from control chars because - * they could screw up the keyfile */ - clear_str (email); - clear_str (name); - - cinfo->_email = email; - cinfo->_name = name; - cinfo->_personal = personal; - cinfo->_tstamp = tstamp; - cinfo->_freq = freq; - - return cinfo; -} - -static void -contact_info_destroy (ContactInfo *cinfo) -{ - if (!cinfo) - return; - - g_free (cinfo->_email); - g_free (cinfo->_name); - - g_slice_free (ContactInfo, cinfo); -} - - -size_t -mu_contacts_count (MuContacts *self) -{ - g_return_val_if_fail (self, 0); - - return g_hash_table_size (self->_hash); -} diff --git a/lib/mu-contacts.cc b/lib/mu-contacts.cc new file mode 100644 index 00000000..3936ae57 --- /dev/null +++ b/lib/mu-contacts.cc @@ -0,0 +1,282 @@ +/* +** Copyright (C) 2019 Dirk-Jan C. Binnema +** +** This program is free software; you can redistribute it and/or modify it +** under the terms of the GNU General Public License as published by the +** Free Software Foundation; either version 3, or (at your option) any +** later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software Foundation, +** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +** +*/ + +#include "mu-contacts.hh" + +#include +#include +#include +#include +#include +#include + +#include +#include + +using namespace Mu; + +ContactInfo::ContactInfo (const std::string& _full_address, + const std::string& _email, + const std::string& _name, + bool _personal, time_t _last_seen, size_t _freq): + full_address{_full_address}, + email{_email}, + name{_name}, + personal{_personal}, + last_seen{_last_seen}, + freq{_freq}, + tstamp{g_get_monotonic_time()} {} + + +struct EmailHash { + std::size_t operator()(const std::string& email) const { + std::size_t djb = 5381; // djb hash + for (const auto c : email) + djb = ((djb << 5) + djb) + g_ascii_tolower(c); + return djb; + } +}; + +struct EmailEqual { + bool operator()(const std::string& email1, const std::string& email2) const { + return g_ascii_strcasecmp(email1.c_str(), email2.c_str()) == 0; + } +}; + +struct ContactInfoHash { + std::size_t operator()(const ContactInfo& ci) const { + std::size_t djb = 5381; // djb hash + for (const auto c : ci.email) + djb = ((djb << 5) + djb) + g_ascii_tolower(c); + return djb; + } +}; + +struct ContactInfoEqual { + bool operator()(const Mu::ContactInfo& ci1, const Mu::ContactInfo& ci2) const { + return g_ascii_strcasecmp(ci1.email.c_str(), ci2.email.c_str()) == 0; + } +}; + +struct ContactInfoLessThan { + bool operator()(const Mu::ContactInfo& ci1, const Mu::ContactInfo& ci2) const { + + if (ci1.personal != ci2.personal) + return ci1.personal; // personal comes first + + if (ci1.last_seen != ci2.last_seen) // more recent comes first + return ci1.last_seen > ci2.last_seen; + + if (ci1.freq != ci2.freq) // more frequent comes first + return ci1.freq > ci2.freq; + + return g_ascii_strcasecmp(ci1.email.c_str(), ci2.email.c_str()) < 0; + } +}; + +using ContactUMap = std::unordered_map; +//using ContactUSet = std::unordered_set; +using ContactSet = std::set, ContactInfoLessThan>; + +struct Contacts::Private { + Private(const std::string& serialized): + contacts_{deserialize(serialized)} + {} + + ContactUMap deserialize(const std::string&) const; + std::string serialize() const; + + ContactUMap contacts_; + std::mutex mtx_; +}; + +constexpr auto Separator = "\xff"; // Invalid in UTF-8 + +ContactUMap +Contacts::Private::deserialize(const std::string& serialized) const +{ + ContactUMap contacts; + std::stringstream ss{serialized, std::ios_base::in}; + std::string line; + + while (getline (ss, line)) { + + const auto parts = Mux::split (line, Separator); + if (G_UNLIKELY(parts.size() != 6)) { + g_warning ("error: '%s'", line.c_str()); + continue; + } + + ContactInfo ci(std::move(parts[0]), // full address + parts[1], // email + std::move(parts[2]), // name + parts[3][0] == '1' ? true : false, // personal + (time_t)g_ascii_strtoll(parts[4].c_str(), NULL, 10), // last_seen + (std::size_t)g_ascii_strtoll(parts[5].c_str(), NULL, 10)); // freq + + contacts.emplace(std::move(parts[1]), std::move(ci)); + + } + + return contacts; +} + + +Contacts::Contacts (const std::string& serialized) : + priv_{std::make_unique(serialized)} +{} + +Contacts::~Contacts() = default; + +std::string +Contacts::serialize() const +{ + std::lock_guard l_{priv_->mtx_}; + std::string s; + + for (const auto& item: priv_->contacts_) { + const auto& ci{item.second}; + s += Mux::format("%s%s" + "%s%s" + "%s%s" + "%d%s" + "%" G_GINT64_FORMAT "%s" + "%" G_GINT64_FORMAT "\n", + ci.full_address.c_str(), Separator, + ci.email.c_str(), Separator, + ci.name.c_str(), Separator, + ci.personal ? 1 : 0, Separator, + (gint64)ci.last_seen, Separator, + (gint64)ci.freq); + } + + return s; +} + +void +Contacts::add (ContactInfo&& ci) +{ + std::lock_guard l_{priv_->mtx_}; + + auto down = g_ascii_strdown (ci.email.c_str(), -1); + std::string email{down}; + g_free(down); + + auto it = priv_->contacts_.find(email); + if (it != priv_->contacts_.end()) { + auto& ci2 = it->second; + ++ci2.freq; + if (ci.last_seen > ci2.last_seen) { + ci2.last_seen = ci.last_seen; + ci2.email = std::move(ci.email); + if (!ci.name.empty()) + ci2.name = std::move(ci.name); + } + } else { + priv_->contacts_.emplace(std::move(email), std::move(ci)); + } +} + + +const ContactInfo* +Contacts::_find (const std::string& email) const +{ + std::lock_guard l_{priv_->mtx_}; + + ContactInfo ci{"", email, "", false, 0}; + const auto it = priv_->contacts_.find(ci.email); + if (it == priv_->contacts_.end()) + return {}; + else + return &it->second; +} + + +void +Contacts::clear() +{ + std::lock_guard l_{priv_->mtx_}; + + priv_->contacts_.clear(); +} + + +std::size_t +Contacts::size() const +{ + std::lock_guard l_{priv_->mtx_}; + + return priv_->contacts_.size(); +} + + +void +Contacts::for_each(const EachContactFunc& each_contact) const +{ + std::lock_guard l_{priv_->mtx_}; + + if (!each_contact) + return; // nothing to do + + // first sort them for 'rank' + ContactSet sorted; + for (const auto& item: priv_->contacts_) + sorted.emplace(item.second); + + for (const auto& ci: sorted) + each_contact (ci); +} + +/// C binding + +size_t +mu_contacts_count (MuContacts *self) +{ + g_return_val_if_fail (self, 0); + + auto myself = reinterpret_cast(self); + + return myself->size(); +} + +gboolean +mu_contacts_foreach (MuContacts *self, MuContactsForeachFunc func, + gpointer user_data) +{ + g_return_val_if_fail (self, FALSE); + g_return_val_if_fail (func, FALSE); + + auto myself = reinterpret_cast(self); + + myself->for_each([&](const ContactInfo& ci) { + g_return_if_fail (!ci.email.empty()); + func(ci.full_address.c_str(), + ci.email.c_str(), + ci.name.empty() ? NULL : ci.name.c_str(), + ci.personal, + ci.last_seen, + ci.freq, + ci.tstamp, + user_data); + }); + + return TRUE; +} + +struct _MuContacts : public Mu::Contacts {}; /**< c-compat */ diff --git a/lib/mu-contacts.h b/lib/mu-contacts.h index 68c74d0b..62a946eb 100644 --- a/lib/mu-contacts.h +++ b/lib/mu-contacts.h @@ -1,5 +1,3 @@ -/* -*-mode: c; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-*/ - /* ** Copyright (C) 2012-2016 Dirk-Jan C. Binnema ** @@ -30,60 +28,6 @@ G_BEGIN_DECLS struct _MuContacts; typedef struct _MuContacts MuContacts; -/** - * create a new MuContacts object; use mu_contacts_destroy when you no longer need it - * - * @param ccachefile full path to the file with cached list of contacts - * - * @return a new MuContacts* if succeeded, NULL otherwise - */ -MuContacts* mu_contacts_new (const gchar *ccachefile) - G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT; - - -/** - * add a contacts; if there's a contact with this e-mail address - * already, it will not updated unless the timestamp of this one is - * higher and has a non-empty name - * - * @param contacts a contacts object - * @param email e-mail address of the contact (not NULL) - * @param name name of the contact (or NULL) - * @param personal whether the contact is 'personal' (ie., my address - * appears in one of the address fields) - * @param tstamp timestamp for this address - * - * @return TRUE if succeeded, FALSE otherwise - */ -gboolean mu_contacts_add (MuContacts *self, const char *email, - const char* name, gboolean personal, time_t tstamp); - -/** - * destroy the Contacts object - * - * @param contacts a contacts object - */ -void mu_contacts_destroy (MuContacts *self); - - -/** - * clear all contacts from the cache - * - * @param self a MuContacts instance - */ -void mu_contacts_clear (MuContacts *self); - - -/** - * get the path for the contacts cache file - * - * @param contacts a contacts object - * - * @return the path as a constant string (don't free), or NULL in case - * of error - */ -const gchar* mu_contacts_get_path (MuContacts *self); - /** * return the number of contacts @@ -94,18 +38,18 @@ const gchar* mu_contacts_get_path (MuContacts *self); */ size_t mu_contacts_count (MuContacts *self); - /** - * call called for mu_contacts_foreach; returns the e-mail address, - * name (which may be NULL) , whether the message is 'personal', the - * timestamp for the address (when it was last seen), and the - * frequency (in how many message did this contact participate) + * Function called for mu_contacts_foreach; returns the e-mail address, name + * (which may be NULL) , whether the message is 'personal', the timestamp for + * the address (when it was last seen), and the frequency (in how many message + * did this contact participate) and the tstamp (last modification) * */ -typedef void (*MuContactsForeachFunc) (const char *email, const char *name, - gboolean personal, - time_t tstamp, unsigned freq, - gpointer user_data); +typedef void (*MuContactsForeachFunc) (const char *full_address, + const char *email, const char *name, + gboolean personal, + time_t last_seen, unsigned freq, + gint64 tstamp, gpointer user_data); /** * call a function for either each contact, or each contact satisfying @@ -114,25 +58,13 @@ typedef void (*MuContactsForeachFunc) (const char *email, const char *name, * @param self contacts object * @param func callback function to be called for each * @param user_data user data to pass to the callback - * @param pattern a regular expression which matches either the e-mail - * or name, to filter out contacts, or NULL to not do any filtering. - * @param num receives the number of contacts found, or NULL * - * @return TRUE if the function succeeded, or FALSE if the provide - * regular expression was invalid (and not NULL) + * @return TRUE if the function succeeded, or FALSE if the provide regular + * expression was invalid (and not NULL) */ -gboolean mu_contacts_foreach (MuContacts *self, MuContactsForeachFunc func, - gpointer user_data, const char* pattern, - size_t *num); - -/** - * serialize the contacts to the contacts cache file - * - * @param self contacts object - * - * @return TRUE if the function succeeded, FALSE otherwise - * */ -gboolean mu_contacts_serialize (MuContacts *self); +gboolean mu_contacts_foreach (MuContacts *self, + MuContactsForeachFunc func, + gpointer user_data); G_END_DECLS diff --git a/lib/mu-contacts.hh b/lib/mu-contacts.hh new file mode 100644 index 00000000..19cc6614 --- /dev/null +++ b/lib/mu-contacts.hh @@ -0,0 +1,155 @@ +/* +** Copyright (C) 2019 Dirk-Jan C. Binnema +** +** This program is free software; you can redistribute it and/or modify it +** under the terms of the GNU General Public License as published by the +** Free Software Foundation; either version 3, or (at your option) any +** later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software Foundation, +** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +** +*/ + +#ifndef __MU_CONTACTS_HH__ +#define __MU_CONTACTS_HH__ + +#include +#include +#include +#include +#include + + +#include "mu-contacts.h" + +namespace Mu { + +/// Data-structure representing information about some contact. + +struct ContactInfo { + /** + * Construct a new ContactInfo + * + * @param _full_address the full email address + name. + * @param _email email addres + * @param _name name or empty + * @param _personal is this a personal contact? + * @param _last_seen when was this contact last seen? + * @param _freq how often was this contact seen? + * + * @return + */ + ContactInfo (const std::string& _full_address, + const std::string& _email, + const std::string& _name, + bool _personal, time_t _last_seen, size_t _freq=1); + + std::string full_address; /**< Full name */ + std::string email; /**< email address */ + std::string name; /**< name (or empty) */ + bool personal; /**< is this a personal contact? */ + time_t last_seen; /**< when was this contact last seen? */ + std::size_t freq; /**< how often was this contact seen? */ + + int64_t tstamp; /**< Time-stamp, as per g_get_monotonic_time */ +}; + +/// All contacts +class Contacts { +public: + /** + * Construct a new contacts object + * + * @param serialized serialized contacts + */ + Contacts (const std::string& serialized); + + /** + * DTOR + * + */ + ~Contacts (); + + /** + * Add a contact + * + * @param ci A contact-info object + */ + void add(ContactInfo&& ci); + + /** + * Clear all contacts + * + */ + void clear(); + + /** + * Get the number of contacts + * + + * @return number of contacts + */ + std::size_t size() const; + + /** + * Are there no contacts? + * + * @return true or false + */ + bool empty() const { return size() == 0; } + + /** + * Get the contacts, serialized. + * + * @return serialized contacts + */ + std::string serialize() const; + + /** + * Find a contact based on the email address. This is not safe, since + * the returned ptr can be invalidated at any time; only for unit-tests. + * + * @param email email address + * + * @return contact info, or {} if not found + */ + const ContactInfo* _find (const std::string& email) const; + + /** + * Prototype for a callable that receives a contact + * + * @param contact some contact + */ + using EachContactFunc = std::function; + + /** + * Invoke some callable for each contact, in order of rank. + * + * @param each_contact + */ + void for_each (const EachContactFunc& each_contact) const; + + /** + * For C compatiblityy + * + * @return a MuContacts* refering to this. + */ + MuContacts* mu_contacts() { return reinterpret_cast(this); } + + + +private: + struct Private; + std::unique_ptr priv_; +}; + +}; // namespace Mu + +#endif /* __MU_CONTACTS_HH__ */ diff --git a/lib/mu-store-priv.hh b/lib/mu-store-priv.hh index c919fd5c..182e8a09 100644 --- a/lib/mu-store-priv.hh +++ b/lib/mu-store-priv.hh @@ -21,18 +21,18 @@ #ifndef __MU_STORE_PRIV_HH__ #define __MU_STORE_PRIV_HH__ -#if HAVE_CONFIG_H #include "config.h" -#endif /*HAVE_CONFIG_H*/ #include -#include +#include #include #include #include +#include + #include "mu-store.h" -#include "mu-contacts.h" +#include "mu-contacts.hh" #include "mu-str.h" class MuStoreError { @@ -46,35 +46,23 @@ private: const std::string _what; }; +#define MU_CONTACTS_CACHE "contacts-cache" + struct _MuStore { public: /* create a read-write MuStore */ - _MuStore (const char *patharg, const char *contacts_path, - bool rebuild) { - - init (patharg, contacts_path, rebuild, false); + _MuStore (const char *patharg, bool rebuild) { if (rebuild) - _db = new Xapian::WritableDatabase + _db = std::make_unique (patharg, Xapian::DB_CREATE_OR_OVERWRITE); else - _db = new Xapian::WritableDatabase + _db = std::make_unique (patharg, Xapian::DB_CREATE_OR_OPEN); + init (patharg, rebuild, false); check_set_version (); - if (contacts_path) { - /* when rebuilding, attempt to clear the - * contacts path */ - if (rebuild && access (contacts_path, W_OK) == 0) - (void)unlink (contacts_path); - - _contacts = mu_contacts_new (contacts_path); - if (!_contacts) - throw MuStoreError (MU_ERROR_FILE, - ("failed to init contacts cache")); - } - MU_WRITE_LOG ("%s: opened %s (batch size: %u) for read-write", __func__, this->path(), (unsigned)batch_size()); } @@ -82,9 +70,9 @@ public: /* create a read-only MuStore */ _MuStore (const char *patharg) { - init (patharg, NULL, false, false); - _db = new Xapian::Database (patharg); + _db = std::make_unique(patharg); + init (patharg, false, false); if (!mu_store_versions_match(this)) { char *errstr = g_strdup_printf ("db version: %s, but we need %s; " @@ -99,18 +87,19 @@ public: MU_WRITE_LOG ("%s: opened %s read-only", __func__, this->path()); } - void init (const char *patharg, const char *contacts_path, - bool rebuild, bool read_only) { + void init (const char *patharg, bool rebuild, bool read_only) { _my_addresses = NULL; _batch_size = DEFAULT_BATCH_SIZE; - _contacts = 0; _in_transaction = false; _path = patharg; _processed = 0; _read_only = read_only; _ref_count = 1; _version = NULL; + + _contacts = std::make_unique( + _db->get_metadata(MU_CONTACTS_CACHE)); } void set_my_addresses (const char **addrs) { @@ -146,18 +135,14 @@ public: if (_ref_count != 0) g_warning ("ref count != 0"); - mu_contacts_destroy (_contacts); - _contacts = NULL; - - if (!_read_only) - mu_store_flush (this); + if (!_read_only && db_writable()) + mu_store_flush (this); g_free (_version); mu_str_free_list (_my_addresses); MU_WRITE_LOG ("closing xapian database with %d document(s)", (int)db_read_only()->get_doccount()); - delete _db; } MU_XAPIAN_CATCH_BLOCK; } @@ -169,19 +154,14 @@ public: // clear the database db_writable()->close (); - delete _db; - _db = new Xapian::WritableDatabase + _db = std::make_unique (path(), Xapian::DB_CREATE_OR_OVERWRITE); - - // clear the contacts cache - if (_contacts) - mu_contacts_clear (_contacts); } // not re-entrant; stays valid until called again const char *get_uid_term (const char *path) const; - MuContacts* contacts() { return _contacts; } + Mu::Contacts* contacts() { return _contacts.get(); } const char *version () const { if (!_version) @@ -204,10 +184,11 @@ public: Xapian::WritableDatabase* db_writable() { if (G_UNLIKELY(is_read_only())) throw std::runtime_error ("database is read-only"); - return (Xapian::WritableDatabase*)_db; + return dynamic_cast(_db.get()); } - Xapian::Database* db_read_only() const { return _db; } + Xapian::Database* db_read_only() const { + return dynamic_cast(_db.get()); } const char* path () const { return _path.c_str(); } bool is_read_only () const { return _read_only; } @@ -235,20 +216,20 @@ public: GSList *my_addresses () { return _my_addresses; } /* by default, use transactions of 30000 messages */ - static const unsigned DEFAULT_BATCH_SIZE = 30000; + static const unsigned DEFAULT_BATCH_SIZE = 30000; private: /* transaction handling */ - bool _in_transaction; - int _processed; - size_t _batch_size; /* batch size of a xapian transaction */ + bool _in_transaction; + int _processed; + size_t _batch_size; /* batch size of a xapian transaction */ /* contacts object to cache all the contact information */ - MuContacts *_contacts; - std::string _path; mutable char *_version; - Xapian::Database *_db; + std::unique_ptr _db; + std::unique_ptr _contacts; + bool _read_only; guint _ref_count; diff --git a/lib/mu-store-read.cc b/lib/mu-store-read.cc index 191c4a2b..a663cd61 100644 --- a/lib/mu-store-read.cc +++ b/lib/mu-store-read.cc @@ -87,6 +87,19 @@ mu_store_is_read_only (const MuStore *store) } +MuContacts* +mu_store_contacts (MuStore *store) +{ + g_return_val_if_fail (store, FALSE); + + try { + return store->contacts()->mu_contacts(); + + } MU_XAPIAN_CATCH_BLOCK_RETURN(FALSE); +} + + + unsigned mu_store_count (const MuStore *store, GError **err) { diff --git a/lib/mu-store-write.cc b/lib/mu-store-write.cc index ad3681c3..78b013f5 100644 --- a/lib/mu-store-write.cc +++ b/lib/mu-store-write.cc @@ -40,7 +40,7 @@ #include "mu-str.h" #include "mu-date.h" #include "mu-flags.h" -#include "mu-contacts.h" +#include "mu-contacts.hh" void _MuStore::begin_transaction () @@ -133,8 +133,7 @@ mu_store_new_writable (const char* xpath, const char *contacts_cache, try { try { MuStore *store; - store = new _MuStore (xpath, contacts_cache, - rebuild ? true : false); + store = new _MuStore (xpath, rebuild ? true : false); add_synonyms (store); return store; @@ -190,20 +189,19 @@ mu_store_clear (MuStore *store, GError **err) void -mu_store_flush (MuStore *store) -{ - g_return_if_fail (store); +mu_store_flush (MuStore *store) try { - try { - if (store->in_transaction()) - store->commit_transaction (); - store->db_writable()->commit (); + g_return_if_fail (store); - } MU_XAPIAN_CATCH_BLOCK; + if (store->in_transaction()) + store->commit_transaction (); + store->db_writable()->commit (); - if (store->contacts()) - mu_contacts_serialize (store->contacts()); -} + if (store->contacts()) + store->db_writable()->set_metadata(MU_CONTACTS_CACHE, + store->contacts()->serialize()); + +} MU_XAPIAN_CATCH_BLOCK; static void add_terms_values_date (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid) @@ -601,11 +599,12 @@ each_contact_info (MuMsgContact *contact, MsgDoc *msgdoc) add_term(*msgdoc->_doc, pfx + flat); add_address_subfields (*msgdoc->_doc, contact->address, pfx); /* store it also in our contacts cache */ - if (msgdoc->_store->contacts()) - mu_contacts_add (msgdoc->_store->contacts(), - contact->address, contact->name, - msgdoc->_personal, - mu_msg_get_date(msgdoc->_msg)); + auto contacts = msgdoc->_store->contacts(); + if (contacts) + contacts->add(contact->address, + contact->name ? contact->name : "", + msgdoc->_personal, + mu_msg_get_date(msgdoc->_msg)); } return TRUE; diff --git a/lib/mu-store.h b/lib/mu-store.h index 7df551de..4f83998f 100644 --- a/lib/mu-store.h +++ b/lib/mu-store.h @@ -24,6 +24,7 @@ #include #include #include /* for MuError, MuError */ +#include G_BEGIN_DECLS @@ -146,6 +147,16 @@ void mu_store_set_batch_size (MuStore *store, guint batchsize); void mu_store_set_my_addresses (MuStore *store, const char **my_addresses); +/** + * Get the a MuContacts* ptr for this store. + * + * @param store a store + * + * @return the contacts ptr + */ +MuContacts* mu_store_contacts (MuStore *store); + + /** * get the numbers of documents in the database *