contacts: rework: install in database

Rewrite the contacts-cache backend in c++

Store the contacts as metadata in the xapian database, rather than in a
separate file.

Update the Store to deal with this.
This commit is contained in:
djcb 2019-04-28 13:57:07 +03:00
parent 9a225365c1
commit 8193cc3e4c
9 changed files with 525 additions and 673 deletions

View File

@ -63,7 +63,8 @@ noinst_LTLIBRARIES= \
libmu_la_SOURCES= \
mu-bookmarks.c \
mu-bookmarks.h \
mu-contacts.c \
mu-contacts.cc \
mu-contacts.hh \
mu-contacts.h \
mu-container.c \
mu-container.h \

View File

@ -1,522 +0,0 @@
/* -*-mode: c; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-*/
/*
** Copyright (C) 2008-2016 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This program is free software; you can redistribute it and/or modify it
** under the terms of the GNU General Public License as published by the
** Free Software Foundation; either version 3, or (at your option) any
** later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software Foundation,
** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
**
*/
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <ctype.h>
#include "mu-contacts.h"
#include "mu-util.h"
#include "mu-str.h"
#define EMAIL_KEY "email"
#define NAME_KEY "name"
#define TSTAMP_KEY "tstamp"
#define PERSONAL_KEY "personal"
#define FREQ_KEY "frequency"
/* note: 'personal' here means a mail where my e-mail addresses is explicitly
* in one of the address fields, ie., it's not some mailing list message */
struct _ContactInfo {
gchar *_name, *_email;
gboolean _personal;
time_t _tstamp;
unsigned _freq;
};
typedef struct _ContactInfo ContactInfo;
static void contact_info_destroy (ContactInfo *cinfo);
static ContactInfo *contact_info_new (char *email, char *name,
gboolean personal, time_t tstamp, unsigned freq);
struct _MuContacts {
GKeyFile *_ccache;
gchar *_path;
GHashTable *_hash;
gboolean _dirty;
};
static GKeyFile*
load_key_file (const char *path)
{
GError *err;
GKeyFile *keyfile;
gboolean file_exists;
err = NULL;
/* of course this is racy, but it's only for giving more
* meaningful errors to users */
file_exists = TRUE;
if (access(path, F_OK) != 0) {
if (errno != ENOENT) {
g_warning ("cannot open %s: %s", path, strerror(errno));
return NULL;
}
file_exists = FALSE;
}
err = NULL;
keyfile = g_key_file_new ();
if (file_exists && !g_key_file_load_from_file
(keyfile, path, G_KEY_FILE_KEEP_COMMENTS, &err)) {
g_warning ("could not load keyfile %s: %s", path, err->message);
g_error_free (err);
g_key_file_free (keyfile);
return NULL;
}
return keyfile;
}
static gboolean
get_values (GKeyFile *kfile, const gchar *group,
gchar **email, gchar **name, gboolean *personal, size_t *tstamp,
unsigned *freq)
{
GError *err;
err = NULL;
do {
int i;
*email = g_key_file_get_value (kfile, group, EMAIL_KEY, &err);
if (!*email)
break;
*tstamp = (time_t)g_key_file_get_integer (kfile, group,
TSTAMP_KEY, &err);
if (err)
break;
*personal = g_key_file_get_boolean (kfile, group,
PERSONAL_KEY, NULL);
*name = g_key_file_get_value (kfile, group, NAME_KEY, NULL);
i = g_key_file_get_integer (kfile, group, FREQ_KEY, NULL);
*freq = (unsigned)(i >= 0) ? i : 1;
return TRUE;
} while (0);
g_warning ("error getting value for %s: %s",
group, err->message ? err->message: "error");
g_clear_error (&err);
return FALSE;
}
static gboolean
deserialize_cache (MuContacts *self)
{
gchar **groups;
gsize i, len;
groups = g_key_file_get_groups (self->_ccache, &len);
for (i = 0; i != len; ++i) {
ContactInfo *cinfo;
char *name, *email;
size_t tstamp;
gboolean personal;
unsigned freq;
if (!get_values (self->_ccache, groups[i],
&email, &name, &personal, &tstamp, &freq))
continue; /* ignore this one... */
cinfo = contact_info_new (email, name, personal, tstamp, freq);
/* note, we're using the groups[i], so don't free with g_strfreev */
g_hash_table_insert (self->_hash, groups[i],
cinfo);
}
g_free (groups);
return TRUE;
}
static gboolean
set_comment (GKeyFile *kfile)
{
GError *err;
const char *comment =
" automatically generated -- do not edit";
err = NULL;
if (!g_key_file_set_comment (kfile, NULL, NULL, comment, &err)) {
g_warning ("could not write comment to keyfile: %s",
err->message);
g_error_free (err);
return FALSE;
}
return TRUE;
}
MuContacts*
mu_contacts_new (const gchar *path)
{
MuContacts *self;
g_return_val_if_fail (path, NULL);
self = g_new0 (MuContacts, 1);
self->_path = g_strdup (path);
self->_hash = g_hash_table_new_full
(g_str_hash, g_str_equal, g_free,
(GDestroyNotify)contact_info_destroy);
self->_ccache = load_key_file (path);
if (!self->_ccache || !set_comment (self->_ccache)) {
mu_contacts_destroy (self);
return NULL;
}
deserialize_cache (self);
MU_WRITE_LOG("deserialized contacts from cache %s",
path);
self->_dirty = FALSE;
return self;
}
void
mu_contacts_clear (MuContacts *self)
{
g_return_if_fail (self);
if (self->_ccache)
g_key_file_free (self->_ccache);
g_hash_table_remove_all (self->_hash);
self->_ccache = g_key_file_new ();
self->_dirty = FALSE;
}
/*
* we use the e-mail address to create a key in the GKeyFile, but we
* have to mutilate a bit so that it's (a) *cough* practically-unique
* and (b) valid as a GKeyFile group name (ie., valid utf8, no control
* chars, no '[' or ']')
*/
static const char*
encode_email_address (const char *addr)
{
static char enc[254 + 1]; /* max size for an e-mail addr */
char *cur;
if (!addr)
return FALSE;
cur = strncpy(enc, addr, sizeof(enc) - 1);
cur[sizeof(enc) - 1] = '\0';
/* make sure chars are with {' ' .. '~'}, and not '[' ']' */
for (; *cur != '\0'; ++cur)
if (!isalnum(*cur)) {
*cur = 'A' + (*cur % ('Z' - 'A'));
} else
*cur = tolower(*cur);
return enc;
}
/* downcase the domain-part of the email address, but only if it
* consists of ascii (to prevent screwing up idna addresses)
*/
static char*
downcase_domain_maybe (const char *addr)
{
char *addr_conv, *at, *cur;
addr_conv = g_strdup (addr);
if (!(at = strchr (addr_conv, '@'))) { /*huh?*/
g_free (addr_conv);
return NULL;
}
for (cur = at + 1; *cur; ++cur) {
if (isascii(*cur))
*cur = g_ascii_tolower (*cur);
else { /* non-ascii; return the unchanged original */
g_free (addr_conv);
return g_strdup (addr);
}
}
return addr_conv;
}
static void
clear_str (char* str)
{
if (str) {
mu_str_remove_ctrl_in_place (str);
g_strstrip (str);
}
}
gboolean
mu_contacts_add (MuContacts *self, const char *addr, const char *name,
gboolean personal, time_t tstamp)
{
ContactInfo *cinfo;
const char *group;
g_return_val_if_fail (self, FALSE);
g_return_val_if_fail (addr, FALSE);
group = encode_email_address (addr);
cinfo = (ContactInfo*) g_hash_table_lookup (self->_hash, group);
if (!cinfo) {
char *addr_dc;
if (!(addr_dc = downcase_domain_maybe (addr)))
return FALSE;
cinfo = contact_info_new (addr_dc,
name ? g_strdup(name) : NULL, personal,
tstamp, 1);
g_hash_table_insert (self->_hash, g_strdup(group), cinfo);
} else {
/* if the contact is ever used in a personal way, it's
* personal */
if (personal)
cinfo->_personal = TRUE;
if (cinfo->_tstamp < tstamp) {
if (!mu_str_is_empty(name)) {
/* update the name to the last one used, unless it's
* empty*/
g_free (cinfo->_name);
cinfo->_name = g_strdup (name);
if (cinfo->_name)
mu_str_remove_ctrl_in_place (cinfo->_name);
}
cinfo->_tstamp = tstamp;
}
++cinfo->_freq;
}
self->_dirty = TRUE;
return TRUE;
}
struct _EachContactData {
MuContactsForeachFunc _func;
gpointer _user_data;
GRegex *_rx;
size_t _num;
};
typedef struct _EachContactData EachContactData;
static void /* email will never be NULL, but ci->_name may be */
each_contact (const char *group, ContactInfo *ci, EachContactData *ecdata)
{
if (!ci->_email)
g_warning ("missing email: %u", (unsigned)ci->_tstamp);
/* ignore this contact if we have a regexp, and it matches
* neither email nor name (if we have a name) */
while (ecdata->_rx) { /* note, only once */
if (g_regex_match (ecdata->_rx, ci->_email, 0, NULL))
break; /* email matches? continue! */
if (!ci->_name)
return; /* email did not match, no name? ignore this one */
if (g_regex_match (ecdata->_rx, ci->_name, 0, NULL))
break; /* name matches? continue! */
return; /* nothing matched, ignore this one */
}
ecdata->_func (ci->_email, ci->_name, ci->_personal,
ci->_tstamp, ci->_freq, ecdata->_user_data);
++ecdata->_num;
}
gboolean
mu_contacts_foreach (MuContacts *self, MuContactsForeachFunc func,
gpointer user_data, const char *pattern, size_t *num)
{
EachContactData ecdata;
g_return_val_if_fail (self, FALSE);
g_return_val_if_fail (func, FALSE);
if (pattern) {
GError *err;
err = NULL;
ecdata._rx = g_regex_new
(pattern, G_REGEX_CASELESS|G_REGEX_OPTIMIZE,
0, &err);
if (!ecdata._rx) {
g_warning ("error in regexp '%s': %s",
pattern, err->message);
g_error_free (err);
return FALSE;
}
} else
ecdata._rx = NULL;
ecdata._func = func;
ecdata._user_data = user_data;
ecdata._num = 0;
g_hash_table_foreach (self->_hash,
(GHFunc)each_contact,
&ecdata);
if (ecdata._rx)
g_regex_unref (ecdata._rx);
if (num)
*num = ecdata._num;
return TRUE;
}
static void
each_keyval (const char *group, ContactInfo *cinfo, MuContacts *self)
{
/* use set value so the string do not necessarily have to be
* valid utf-8 */
if (cinfo->_name)
g_key_file_set_value (self->_ccache, group, NAME_KEY,
cinfo->_name);
g_key_file_set_value (self->_ccache, group, EMAIL_KEY,
cinfo->_email);
g_key_file_set_boolean (self->_ccache, group, PERSONAL_KEY,
cinfo->_personal);
g_key_file_set_integer (self->_ccache, group, TSTAMP_KEY,
(int)cinfo->_tstamp);
g_key_file_set_integer (self->_ccache, group, FREQ_KEY,
(int)cinfo->_freq);
}
gboolean
mu_contacts_serialize (MuContacts *self)
{
gchar *data;
gsize len;
gboolean rv;
g_return_val_if_fail (self, FALSE);
g_hash_table_foreach (self->_hash, (GHFunc)each_keyval, self);
/* Note: err arg is unused */
data = g_key_file_to_data (self->_ccache, &len, NULL);
if (len) {
GError *err;
err = NULL;
rv = g_file_set_contents (self->_path, data, len, &err);
if (!rv) {
g_warning ("failed to serialize cache to %s: %s",
self->_path, err->message);
g_error_free (err);
}
g_free (data);
} else
rv = TRUE;
return rv;
}
void
mu_contacts_destroy (MuContacts *self)
{
if (!self)
return;
if (self->_ccache && self->_dirty &&
mu_contacts_serialize (self))
MU_WRITE_LOG("serialized contacts cache %s",
self->_path);
if (self->_ccache)
g_key_file_free (self->_ccache);
g_free (self->_path);
if (self->_hash)
g_hash_table_destroy (self->_hash);
g_free (self);
}
/* note, we will *own* the name, email we get, and we'll free them in the
* end... */
static ContactInfo *
contact_info_new (char *email, char *name, gboolean personal, time_t tstamp,
unsigned freq)
{
ContactInfo *cinfo;
/* email should not be NULL, name can */
g_return_val_if_fail (email, NULL);
cinfo = g_slice_new (ContactInfo);
/* we need to clear the strings from control chars because
* they could screw up the keyfile */
clear_str (email);
clear_str (name);
cinfo->_email = email;
cinfo->_name = name;
cinfo->_personal = personal;
cinfo->_tstamp = tstamp;
cinfo->_freq = freq;
return cinfo;
}
static void
contact_info_destroy (ContactInfo *cinfo)
{
if (!cinfo)
return;
g_free (cinfo->_email);
g_free (cinfo->_name);
g_slice_free (ContactInfo, cinfo);
}
size_t
mu_contacts_count (MuContacts *self)
{
g_return_val_if_fail (self, 0);
return g_hash_table_size (self->_hash);
}

282
lib/mu-contacts.cc Normal file
View File

@ -0,0 +1,282 @@
/*
** Copyright (C) 2019 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This program is free software; you can redistribute it and/or modify it
** under the terms of the GNU General Public License as published by the
** Free Software Foundation; either version 3, or (at your option) any
** later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software Foundation,
** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
**
*/
#include "mu-contacts.hh"
#include <mutex>
#include <string>
#include <unordered_map>
#include <set>
#include <sstream>
#include <functional>
#include <parser/utils.hh>
#include <glib.h>
using namespace Mu;
ContactInfo::ContactInfo (const std::string& _full_address,
const std::string& _email,
const std::string& _name,
bool _personal, time_t _last_seen, size_t _freq):
full_address{_full_address},
email{_email},
name{_name},
personal{_personal},
last_seen{_last_seen},
freq{_freq},
tstamp{g_get_monotonic_time()} {}
struct EmailHash {
std::size_t operator()(const std::string& email) const {
std::size_t djb = 5381; // djb hash
for (const auto c : email)
djb = ((djb << 5) + djb) + g_ascii_tolower(c);
return djb;
}
};
struct EmailEqual {
bool operator()(const std::string& email1, const std::string& email2) const {
return g_ascii_strcasecmp(email1.c_str(), email2.c_str()) == 0;
}
};
struct ContactInfoHash {
std::size_t operator()(const ContactInfo& ci) const {
std::size_t djb = 5381; // djb hash
for (const auto c : ci.email)
djb = ((djb << 5) + djb) + g_ascii_tolower(c);
return djb;
}
};
struct ContactInfoEqual {
bool operator()(const Mu::ContactInfo& ci1, const Mu::ContactInfo& ci2) const {
return g_ascii_strcasecmp(ci1.email.c_str(), ci2.email.c_str()) == 0;
}
};
struct ContactInfoLessThan {
bool operator()(const Mu::ContactInfo& ci1, const Mu::ContactInfo& ci2) const {
if (ci1.personal != ci2.personal)
return ci1.personal; // personal comes first
if (ci1.last_seen != ci2.last_seen) // more recent comes first
return ci1.last_seen > ci2.last_seen;
if (ci1.freq != ci2.freq) // more frequent comes first
return ci1.freq > ci2.freq;
return g_ascii_strcasecmp(ci1.email.c_str(), ci2.email.c_str()) < 0;
}
};
using ContactUMap = std::unordered_map<std::string, ContactInfo, EmailHash, EmailEqual>;
//using ContactUSet = std::unordered_set<ContactInfo, ContactInfoHash, ContactInfoEqual>;
using ContactSet = std::set<std::reference_wrapper<const ContactInfo>, ContactInfoLessThan>;
struct Contacts::Private {
Private(const std::string& serialized):
contacts_{deserialize(serialized)}
{}
ContactUMap deserialize(const std::string&) const;
std::string serialize() const;
ContactUMap contacts_;
std::mutex mtx_;
};
constexpr auto Separator = "\xff"; // Invalid in UTF-8
ContactUMap
Contacts::Private::deserialize(const std::string& serialized) const
{
ContactUMap contacts;
std::stringstream ss{serialized, std::ios_base::in};
std::string line;
while (getline (ss, line)) {
const auto parts = Mux::split (line, Separator);
if (G_UNLIKELY(parts.size() != 6)) {
g_warning ("error: '%s'", line.c_str());
continue;
}
ContactInfo ci(std::move(parts[0]), // full address
parts[1], // email
std::move(parts[2]), // name
parts[3][0] == '1' ? true : false, // personal
(time_t)g_ascii_strtoll(parts[4].c_str(), NULL, 10), // last_seen
(std::size_t)g_ascii_strtoll(parts[5].c_str(), NULL, 10)); // freq
contacts.emplace(std::move(parts[1]), std::move(ci));
}
return contacts;
}
Contacts::Contacts (const std::string& serialized) :
priv_{std::make_unique<Private>(serialized)}
{}
Contacts::~Contacts() = default;
std::string
Contacts::serialize() const
{
std::lock_guard<std::mutex> l_{priv_->mtx_};
std::string s;
for (const auto& item: priv_->contacts_) {
const auto& ci{item.second};
s += Mux::format("%s%s"
"%s%s"
"%s%s"
"%d%s"
"%" G_GINT64_FORMAT "%s"
"%" G_GINT64_FORMAT "\n",
ci.full_address.c_str(), Separator,
ci.email.c_str(), Separator,
ci.name.c_str(), Separator,
ci.personal ? 1 : 0, Separator,
(gint64)ci.last_seen, Separator,
(gint64)ci.freq);
}
return s;
}
void
Contacts::add (ContactInfo&& ci)
{
std::lock_guard<std::mutex> l_{priv_->mtx_};
auto down = g_ascii_strdown (ci.email.c_str(), -1);
std::string email{down};
g_free(down);
auto it = priv_->contacts_.find(email);
if (it != priv_->contacts_.end()) {
auto& ci2 = it->second;
++ci2.freq;
if (ci.last_seen > ci2.last_seen) {
ci2.last_seen = ci.last_seen;
ci2.email = std::move(ci.email);
if (!ci.name.empty())
ci2.name = std::move(ci.name);
}
} else {
priv_->contacts_.emplace(std::move(email), std::move(ci));
}
}
const ContactInfo*
Contacts::_find (const std::string& email) const
{
std::lock_guard<std::mutex> l_{priv_->mtx_};
ContactInfo ci{"", email, "", false, 0};
const auto it = priv_->contacts_.find(ci.email);
if (it == priv_->contacts_.end())
return {};
else
return &it->second;
}
void
Contacts::clear()
{
std::lock_guard<std::mutex> l_{priv_->mtx_};
priv_->contacts_.clear();
}
std::size_t
Contacts::size() const
{
std::lock_guard<std::mutex> l_{priv_->mtx_};
return priv_->contacts_.size();
}
void
Contacts::for_each(const EachContactFunc& each_contact) const
{
std::lock_guard<std::mutex> l_{priv_->mtx_};
if (!each_contact)
return; // nothing to do
// first sort them for 'rank'
ContactSet sorted;
for (const auto& item: priv_->contacts_)
sorted.emplace(item.second);
for (const auto& ci: sorted)
each_contact (ci);
}
/// C binding
size_t
mu_contacts_count (MuContacts *self)
{
g_return_val_if_fail (self, 0);
auto myself = reinterpret_cast<Mu::Contacts*>(self);
return myself->size();
}
gboolean
mu_contacts_foreach (MuContacts *self, MuContactsForeachFunc func,
gpointer user_data)
{
g_return_val_if_fail (self, FALSE);
g_return_val_if_fail (func, FALSE);
auto myself = reinterpret_cast<Mu::Contacts*>(self);
myself->for_each([&](const ContactInfo& ci) {
g_return_if_fail (!ci.email.empty());
func(ci.full_address.c_str(),
ci.email.c_str(),
ci.name.empty() ? NULL : ci.name.c_str(),
ci.personal,
ci.last_seen,
ci.freq,
ci.tstamp,
user_data);
});
return TRUE;
}
struct _MuContacts : public Mu::Contacts {}; /**< c-compat */

View File

@ -1,5 +1,3 @@
/* -*-mode: c; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-*/
/*
** Copyright (C) 2012-2016 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
@ -30,60 +28,6 @@ G_BEGIN_DECLS
struct _MuContacts;
typedef struct _MuContacts MuContacts;
/**
* create a new MuContacts object; use mu_contacts_destroy when you no longer need it
*
* @param ccachefile full path to the file with cached list of contacts
*
* @return a new MuContacts* if succeeded, NULL otherwise
*/
MuContacts* mu_contacts_new (const gchar *ccachefile)
G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;
/**
* add a contacts; if there's a contact with this e-mail address
* already, it will not updated unless the timestamp of this one is
* higher and has a non-empty name
*
* @param contacts a contacts object
* @param email e-mail address of the contact (not NULL)
* @param name name of the contact (or NULL)
* @param personal whether the contact is 'personal' (ie., my address
* appears in one of the address fields)
* @param tstamp timestamp for this address
*
* @return TRUE if succeeded, FALSE otherwise
*/
gboolean mu_contacts_add (MuContacts *self, const char *email,
const char* name, gboolean personal, time_t tstamp);
/**
* destroy the Contacts object
*
* @param contacts a contacts object
*/
void mu_contacts_destroy (MuContacts *self);
/**
* clear all contacts from the cache
*
* @param self a MuContacts instance
*/
void mu_contacts_clear (MuContacts *self);
/**
* get the path for the contacts cache file
*
* @param contacts a contacts object
*
* @return the path as a constant string (don't free), or NULL in case
* of error
*/
const gchar* mu_contacts_get_path (MuContacts *self);
/**
* return the number of contacts
@ -94,18 +38,18 @@ const gchar* mu_contacts_get_path (MuContacts *self);
*/
size_t mu_contacts_count (MuContacts *self);
/**
* call called for mu_contacts_foreach; returns the e-mail address,
* name (which may be NULL) , whether the message is 'personal', the
* timestamp for the address (when it was last seen), and the
* frequency (in how many message did this contact participate)
* Function called for mu_contacts_foreach; returns the e-mail address, name
* (which may be NULL) , whether the message is 'personal', the timestamp for
* the address (when it was last seen), and the frequency (in how many message
* did this contact participate) and the tstamp (last modification)
*
*/
typedef void (*MuContactsForeachFunc) (const char *email, const char *name,
gboolean personal,
time_t tstamp, unsigned freq,
gpointer user_data);
typedef void (*MuContactsForeachFunc) (const char *full_address,
const char *email, const char *name,
gboolean personal,
time_t last_seen, unsigned freq,
gint64 tstamp, gpointer user_data);
/**
* call a function for either each contact, or each contact satisfying
@ -114,25 +58,13 @@ typedef void (*MuContactsForeachFunc) (const char *email, const char *name,
* @param self contacts object
* @param func callback function to be called for each
* @param user_data user data to pass to the callback
* @param pattern a regular expression which matches either the e-mail
* or name, to filter out contacts, or NULL to not do any filtering.
* @param num receives the number of contacts found, or NULL
*
* @return TRUE if the function succeeded, or FALSE if the provide
* regular expression was invalid (and not NULL)
* @return TRUE if the function succeeded, or FALSE if the provide regular
* expression was invalid (and not NULL)
*/
gboolean mu_contacts_foreach (MuContacts *self, MuContactsForeachFunc func,
gpointer user_data, const char* pattern,
size_t *num);
/**
* serialize the contacts to the contacts cache file
*
* @param self contacts object
*
* @return TRUE if the function succeeded, FALSE otherwise
* */
gboolean mu_contacts_serialize (MuContacts *self);
gboolean mu_contacts_foreach (MuContacts *self,
MuContactsForeachFunc func,
gpointer user_data);
G_END_DECLS

155
lib/mu-contacts.hh Normal file
View File

@ -0,0 +1,155 @@
/*
** Copyright (C) 2019 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This program is free software; you can redistribute it and/or modify it
** under the terms of the GNU General Public License as published by the
** Free Software Foundation; either version 3, or (at your option) any
** later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software Foundation,
** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
**
*/
#ifndef __MU_CONTACTS_HH__
#define __MU_CONTACTS_HH__
#include <memory>
#include <functional>
#include <chrono>
#include <time.h>
#include <inttypes.h>
#include "mu-contacts.h"
namespace Mu {
/// Data-structure representing information about some contact.
struct ContactInfo {
/**
* Construct a new ContactInfo
*
* @param _full_address the full email address + name.
* @param _email email addres
* @param _name name or empty
* @param _personal is this a personal contact?
* @param _last_seen when was this contact last seen?
* @param _freq how often was this contact seen?
*
* @return
*/
ContactInfo (const std::string& _full_address,
const std::string& _email,
const std::string& _name,
bool _personal, time_t _last_seen, size_t _freq=1);
std::string full_address; /**< Full name <email> */
std::string email; /**< email address */
std::string name; /**< name (or empty) */
bool personal; /**< is this a personal contact? */
time_t last_seen; /**< when was this contact last seen? */
std::size_t freq; /**< how often was this contact seen? */
int64_t tstamp; /**< Time-stamp, as per g_get_monotonic_time */
};
/// All contacts
class Contacts {
public:
/**
* Construct a new contacts object
*
* @param serialized serialized contacts
*/
Contacts (const std::string& serialized);
/**
* DTOR
*
*/
~Contacts ();
/**
* Add a contact
*
* @param ci A contact-info object
*/
void add(ContactInfo&& ci);
/**
* Clear all contacts
*
*/
void clear();
/**
* Get the number of contacts
*
* @return number of contacts
*/
std::size_t size() const;
/**
* Are there no contacts?
*
* @return true or false
*/
bool empty() const { return size() == 0; }
/**
* Get the contacts, serialized.
*
* @return serialized contacts
*/
std::string serialize() const;
/**
* Find a contact based on the email address. This is not safe, since
* the returned ptr can be invalidated at any time; only for unit-tests.
*
* @param email email address
*
* @return contact info, or {} if not found
*/
const ContactInfo* _find (const std::string& email) const;
/**
* Prototype for a callable that receives a contact
*
* @param contact some contact
*/
using EachContactFunc = std::function<void (const ContactInfo& contact_info)>;
/**
* Invoke some callable for each contact, in order of rank.
*
* @param each_contact
*/
void for_each (const EachContactFunc& each_contact) const;
/**
* For C compatiblityy
*
* @return a MuContacts* refering to this.
*/
MuContacts* mu_contacts() { return reinterpret_cast<MuContacts*>(this); }
private:
struct Private;
std::unique_ptr<Private> priv_;
};
}; // namespace Mu
#endif /* __MU_CONTACTS_HH__ */

View File

@ -21,18 +21,18 @@
#ifndef __MU_STORE_PRIV_HH__
#define __MU_STORE_PRIV_HH__
#if HAVE_CONFIG_H
#include "config.h"
#endif /*HAVE_CONFIG_H*/
#include <cstdio>
#include <xapian.h>
#include <memory>
#include <cstring>
#include <stdexcept>
#include <unistd.h>
#include <xapian.h>
#include "mu-store.h"
#include "mu-contacts.h"
#include "mu-contacts.hh"
#include "mu-str.h"
class MuStoreError {
@ -46,35 +46,23 @@ private:
const std::string _what;
};
#define MU_CONTACTS_CACHE "contacts-cache"
struct _MuStore {
public:
/* create a read-write MuStore */
_MuStore (const char *patharg, const char *contacts_path,
bool rebuild) {
init (patharg, contacts_path, rebuild, false);
_MuStore (const char *patharg, bool rebuild) {
if (rebuild)
_db = new Xapian::WritableDatabase
_db = std::make_unique<Xapian::WritableDatabase>
(patharg, Xapian::DB_CREATE_OR_OVERWRITE);
else
_db = new Xapian::WritableDatabase
_db = std::make_unique<Xapian::WritableDatabase>
(patharg, Xapian::DB_CREATE_OR_OPEN);
init (patharg, rebuild, false);
check_set_version ();
if (contacts_path) {
/* when rebuilding, attempt to clear the
* contacts path */
if (rebuild && access (contacts_path, W_OK) == 0)
(void)unlink (contacts_path);
_contacts = mu_contacts_new (contacts_path);
if (!_contacts)
throw MuStoreError (MU_ERROR_FILE,
("failed to init contacts cache"));
}
MU_WRITE_LOG ("%s: opened %s (batch size: %u) for read-write",
__func__, this->path(), (unsigned)batch_size());
}
@ -82,9 +70,9 @@ public:
/* create a read-only MuStore */
_MuStore (const char *patharg) {
init (patharg, NULL, false, false);
_db = new Xapian::Database (patharg);
_db = std::make_unique<Xapian::Database>(patharg);
init (patharg, false, false);
if (!mu_store_versions_match(this)) {
char *errstr =
g_strdup_printf ("db version: %s, but we need %s; "
@ -99,18 +87,19 @@ public:
MU_WRITE_LOG ("%s: opened %s read-only", __func__, this->path());
}
void init (const char *patharg, const char *contacts_path,
bool rebuild, bool read_only) {
void init (const char *patharg, bool rebuild, bool read_only) {
_my_addresses = NULL;
_batch_size = DEFAULT_BATCH_SIZE;
_contacts = 0;
_in_transaction = false;
_path = patharg;
_processed = 0;
_read_only = read_only;
_ref_count = 1;
_version = NULL;
_contacts = std::make_unique<Mu::Contacts>(
_db->get_metadata(MU_CONTACTS_CACHE));
}
void set_my_addresses (const char **addrs) {
@ -146,18 +135,14 @@ public:
if (_ref_count != 0)
g_warning ("ref count != 0");
mu_contacts_destroy (_contacts);
_contacts = NULL;
if (!_read_only)
mu_store_flush (this);
if (!_read_only && db_writable())
mu_store_flush (this);
g_free (_version);
mu_str_free_list (_my_addresses);
MU_WRITE_LOG ("closing xapian database with %d document(s)",
(int)db_read_only()->get_doccount());
delete _db;
} MU_XAPIAN_CATCH_BLOCK;
}
@ -169,19 +154,14 @@ public:
// clear the database
db_writable()->close ();
delete _db;
_db = new Xapian::WritableDatabase
_db = std::make_unique<Xapian::WritableDatabase>
(path(), Xapian::DB_CREATE_OR_OVERWRITE);
// clear the contacts cache
if (_contacts)
mu_contacts_clear (_contacts);
}
// not re-entrant; stays valid until called again
const char *get_uid_term (const char *path) const;
MuContacts* contacts() { return _contacts; }
Mu::Contacts* contacts() { return _contacts.get(); }
const char *version () const {
if (!_version)
@ -204,10 +184,11 @@ public:
Xapian::WritableDatabase* db_writable() {
if (G_UNLIKELY(is_read_only()))
throw std::runtime_error ("database is read-only");
return (Xapian::WritableDatabase*)_db;
return dynamic_cast<Xapian::WritableDatabase*>(_db.get());
}
Xapian::Database* db_read_only() const { return _db; }
Xapian::Database* db_read_only() const {
return dynamic_cast<Xapian::Database*>(_db.get()); }
const char* path () const { return _path.c_str(); }
bool is_read_only () const { return _read_only; }
@ -235,20 +216,20 @@ public:
GSList *my_addresses () { return _my_addresses; }
/* by default, use transactions of 30000 messages */
static const unsigned DEFAULT_BATCH_SIZE = 30000;
static const unsigned DEFAULT_BATCH_SIZE = 30000;
private:
/* transaction handling */
bool _in_transaction;
int _processed;
size_t _batch_size; /* batch size of a xapian transaction */
bool _in_transaction;
int _processed;
size_t _batch_size; /* batch size of a xapian transaction */
/* contacts object to cache all the contact information */
MuContacts *_contacts;
std::string _path;
mutable char *_version;
Xapian::Database *_db;
std::unique_ptr<Xapian::Database> _db;
std::unique_ptr<Mu::Contacts> _contacts;
bool _read_only;
guint _ref_count;

View File

@ -87,6 +87,19 @@ mu_store_is_read_only (const MuStore *store)
}
MuContacts*
mu_store_contacts (MuStore *store)
{
g_return_val_if_fail (store, FALSE);
try {
return store->contacts()->mu_contacts();
} MU_XAPIAN_CATCH_BLOCK_RETURN(FALSE);
}
unsigned
mu_store_count (const MuStore *store, GError **err)
{

View File

@ -40,7 +40,7 @@
#include "mu-str.h"
#include "mu-date.h"
#include "mu-flags.h"
#include "mu-contacts.h"
#include "mu-contacts.hh"
void
_MuStore::begin_transaction ()
@ -133,8 +133,7 @@ mu_store_new_writable (const char* xpath, const char *contacts_cache,
try {
try {
MuStore *store;
store = new _MuStore (xpath, contacts_cache,
rebuild ? true : false);
store = new _MuStore (xpath, rebuild ? true : false);
add_synonyms (store);
return store;
@ -190,20 +189,19 @@ mu_store_clear (MuStore *store, GError **err)
void
mu_store_flush (MuStore *store)
{
g_return_if_fail (store);
mu_store_flush (MuStore *store) try {
try {
if (store->in_transaction())
store->commit_transaction ();
store->db_writable()->commit ();
g_return_if_fail (store);
} MU_XAPIAN_CATCH_BLOCK;
if (store->in_transaction())
store->commit_transaction ();
store->db_writable()->commit ();
if (store->contacts())
mu_contacts_serialize (store->contacts());
}
if (store->contacts())
store->db_writable()->set_metadata(MU_CONTACTS_CACHE,
store->contacts()->serialize());
} MU_XAPIAN_CATCH_BLOCK;
static void
add_terms_values_date (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid)
@ -601,11 +599,12 @@ each_contact_info (MuMsgContact *contact, MsgDoc *msgdoc)
add_term(*msgdoc->_doc, pfx + flat);
add_address_subfields (*msgdoc->_doc, contact->address, pfx);
/* store it also in our contacts cache */
if (msgdoc->_store->contacts())
mu_contacts_add (msgdoc->_store->contacts(),
contact->address, contact->name,
msgdoc->_personal,
mu_msg_get_date(msgdoc->_msg));
auto contacts = msgdoc->_store->contacts();
if (contacts)
contacts->add(contact->address,
contact->name ? contact->name : "",
msgdoc->_personal,
mu_msg_get_date(msgdoc->_msg));
}
return TRUE;

View File

@ -24,6 +24,7 @@
#include <inttypes.h>
#include <mu-msg.h>
#include <mu-util.h> /* for MuError, MuError */
#include <mu-contacts.h>
G_BEGIN_DECLS
@ -146,6 +147,16 @@ void mu_store_set_batch_size (MuStore *store, guint batchsize);
void mu_store_set_my_addresses (MuStore *store, const char **my_addresses);
/**
* Get the a MuContacts* ptr for this store.
*
* @param store a store
*
* @return the contacts ptr
*/
MuContacts* mu_store_contacts (MuStore *store);
/**
* get the numbers of documents in the database
*