1
0
mirror of https://github.com/djcb/mu.git synced 2024-06-30 08:01:07 +02:00
mu/lib/mu-contacts-cache.cc
Dirk-Jan C. Binnema 81689f0af3 contacts-cache: return most relevant contacts
Return in the contacts in *reverse* rank order, i.e. the most relevant come first.
This is useful since we only want the first maxnum contacts, and those should of
course be the most relevant.

Update mu cfind/server as well.

cfind
2022-05-09 22:25:28 +03:00

487 lines
12 KiB
C++

/*
** Copyright (C) 2019-2022 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This program is free software; you can redistribute it and/or modify it
** under the terms of the GNU General Public License as published by the
** Free Software Foundation; either version 3, or (at your option) any
** later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software Foundation,
** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
**
*/
#include "mu-contacts-cache.hh"
#include <mutex>
#include <unordered_map>
#include <set>
#include <sstream>
#include <functional>
#include <algorithm>
#include <regex>
#include <ctime>
#include <utils/mu-utils.hh>
#include <glib.h>
using namespace Mu;
struct EmailHash {
std::size_t operator()(const std::string& email) const {
return lowercase_hash(email);
}
};
struct EmailEqual {
bool operator()(const std::string& email1, const std::string& email2) const {
return lowercase_hash(email1) == lowercase_hash(email2);
}
};
using ContactUMap = std::unordered_map<const std::string, Contact, EmailHash, EmailEqual>;
struct ContactsCache::Private {
Private(const std::string& serialized, const StringVec& personal)
: contacts_{deserialize(serialized)},
personal_plain_{make_personal_plain(personal)},
personal_rx_{make_personal_rx(personal)},
dirty_{0}
{}
ContactUMap deserialize(const std::string&) const;
std::string serialize() const;
ContactUMap contacts_;
std::mutex mtx_;
const StringVec personal_plain_;
const std::vector<std::regex> personal_rx_;
size_t dirty_;
private:
/**
* Return the non-regex addresses
*
* @param personal
*
* @return
*/
StringVec make_personal_plain(const StringVec& personal) const {
StringVec svec;
std::copy_if(personal.begin(), personal.end(),
std::back_inserter(svec), [&](auto&& p) {
return p.size() < 2
|| p.at(0) != '/' || p.at(p.length() - 1) != '/';
});
return svec;
}
/**
* Return regexps for the regex-addresses
*
* @param personal
*
* @return
*/
std::vector<std::regex> make_personal_rx(const StringVec& personal) const {
std::vector<std::regex> rxvec;
for(auto&& p: personal) {
if (p.size() < 2 || p[0] != '/' || p[p.length()- 1] != '/')
continue;
// a regex pattern.
try {
const auto rxstr{p.substr(1, p.length() - 2)};
rxvec.emplace_back(std::regex(
rxstr, std::regex::basic | std::regex::optimize |
std::regex::icase));
} catch (const std::regex_error& rex) {
g_warning("invalid personal address regexp '%s': %s",
p.c_str(),
rex.what());
}
}
return rxvec;
}
};
constexpr auto Separator = "\xff"; // Invalid in UTF-8
ContactUMap
ContactsCache::Private::deserialize(const std::string& serialized) const
{
ContactUMap contacts;
std::stringstream ss{serialized, std::ios_base::in};
std::string line;
while (getline(ss, line)) {
const auto parts = Mu::split(line, Separator);
if (G_UNLIKELY(parts.size() != 6)) {
g_warning("error: '%s'", line.c_str());
continue;
}
Contact ci(parts[1], // email
std::move(parts[2]), // name
(time_t)g_ascii_strtoll(parts[4].c_str(), NULL, 10), // message_date
parts[3][0] == '1' ? true : false, // personal
(std::size_t)g_ascii_strtoll(parts[5].c_str(), NULL, 10), // frequency
g_get_monotonic_time()); // tstamp
contacts.emplace(std::move(parts[1]), std::move(ci));
}
return contacts;
}
ContactsCache::ContactsCache(const std::string& serialized, const StringVec& personal)
: priv_{std::make_unique<Private>(serialized, personal)}
{
}
ContactsCache::~ContactsCache() = default;
std::string
ContactsCache::serialize() const
{
std::lock_guard<std::mutex> l_{priv_->mtx_};
std::string s;
for (auto& item : priv_->contacts_) {
const auto& ci{item.second};
s += Mu::format("%s%s"
"%s%s"
"%s%s"
"%d%s"
"%" G_GINT64_FORMAT "%s"
"%" G_GINT64_FORMAT "\n",
ci.display_name().c_str(),
Separator,
ci.email.c_str(),
Separator,
ci.name.c_str(),
Separator,
ci.personal ? 1 : 0,
Separator,
(gint64)ci.message_date,
Separator,
(gint64)ci.frequency);
}
priv_->dirty_ = 0;
return s;
}
bool
ContactsCache::dirty() const
{
return priv_->dirty_;
}
//const Contact
void
ContactsCache::add(Contact&& contact)
{
std::lock_guard<std::mutex> l_{priv_->mtx_};
++priv_->dirty_;
auto it = priv_->contacts_.find(contact.email);
if (it == priv_->contacts_.end()) { // completely new contact
contact.name = contact.name;
if (!contact.personal)
contact.personal = is_personal(contact.email);
contact.tstamp = g_get_monotonic_time();
auto email{contact.email};
// return priv_->contacts_.emplace(ContactUMap::value_type(email, std::move(contact)))
// .first->second;
priv_->contacts_.emplace(ContactUMap::value_type(email, std::move(contact)));
} else { // existing contact.
auto& existing{it->second};
++existing.frequency;
if (contact.message_date > existing.message_date) { // update?
existing.email = std::move(contact.email);
// update name only if new one is not empty.
if (!contact.name.empty())
existing.name = std::move(contact.name);
existing.tstamp = g_get_monotonic_time();
existing.message_date = contact.message_date;
}
}
}
void
ContactsCache::add(Contacts&& contacts, bool& personal)
{
personal = seq_find_if(contacts,[&](auto&& c){
return is_personal(c.email); }) != contacts.cend();
for (auto&& contact: contacts) {
contact.personal = personal;
add(std::move(contact));
}
}
const Contact*
ContactsCache::_find(const std::string& email) const
{
std::lock_guard<std::mutex> l_{priv_->mtx_};
const auto it = priv_->contacts_.find(email);
if (it == priv_->contacts_.end())
return {};
else
return &it->second;
}
void
ContactsCache::clear()
{
std::lock_guard<std::mutex> l_{priv_->mtx_};
++priv_->dirty_;
priv_->contacts_.clear();
}
std::size_t
ContactsCache::size() const
{
std::lock_guard<std::mutex> l_{priv_->mtx_};
return priv_->contacts_.size();
}
/**
* This is used for sorting the Contacts in order of relevance. A highly
* specific algorithm, but the details don't matter _too_ much.
*
* This is currently used for the ordering in mu-cfind and auto-completion in
* mu4e, if the various completion methods don't override it...
*/
constexpr auto RecentOffset{15 * 24 * 3600};
struct ContactLessThan {
ContactLessThan()
: recently_{::time({}) - RecentOffset} {}
bool operator()(const Mu::Contact& ci1, const Mu::Contact& ci2) const
{
// non-personal is less relevant.
if (ci1.personal != ci2.personal)
return ci1.personal < ci2.personal;
// older is less relevant for recent messages
if (std::max(ci1.message_date, ci2.message_date) > recently_ &&
ci1.message_date != ci2.message_date)
return ci1.message_date < ci2.message_date;
// less frequent is less relevant
if (ci1.frequency != ci2.frequency)
return ci1.frequency < ci2.frequency;
// if all else fails, alphabetically
return ci1.email < ci2.email;
}
// only sort recently seen contacts by recency; approx 15 days.
// this changes during the lifetime, but that's all fine.
const time_t recently_;
};
using ContactSet = std::set<std::reference_wrapper<const Contact>,
ContactLessThan>;
void
ContactsCache::for_each(const EachContactFunc& each_contact) const
{
std::lock_guard<std::mutex> l_{priv_->mtx_};
if (!each_contact)
return; // nothing to do
// first sort them for 'rank'
ContactSet sorted;
for (const auto& item : priv_->contacts_)
sorted.emplace(item.second);
// return in _reverse_ order, so we get the most relevant ones first.
for (auto it = sorted.rbegin(); it != sorted.rend(); ++it) {
if (!each_contact(*it))
break;
}
}
bool
ContactsCache::is_personal(const std::string& addr) const
{
for (auto&& p : priv_->personal_plain_)
if (g_ascii_strcasecmp(addr.c_str(), p.c_str()) == 0)
return true;
for (auto&& rx : priv_->personal_rx_) {
std::smatch m; // perhaps cache addr in personal_plain_?
if (std::regex_match(addr, m, rx))
return true;
}
return false;
}
#ifdef BUILD_TESTS
/*
* Tests.
*
*/
#include "test-mu-common.hh"
static void
test_mu_contacts_cache_base()
{
Mu::ContactsCache contacts("");
g_assert_true(contacts.empty());
g_assert_cmpuint(contacts.size(), ==, 0);
contacts.add(Mu::Contact("foo.bar@example.com",
"Foo", {}, 12345));
g_assert_false(contacts.empty());
g_assert_cmpuint(contacts.size(), ==, 1);
contacts.add(Mu::Contact("cuux@example.com", "Cuux", {},
54321));
g_assert_cmpuint(contacts.size(), ==, 2);
contacts.add(
Mu::Contact("foo.bar@example.com", "Foo", {}, 77777));
g_assert_cmpuint(contacts.size(), ==, 2);
contacts.add(
Mu::Contact("Foo.Bar@Example.Com", "Foo", {}, 88888));
g_assert_cmpuint(contacts.size(), ==, 2);
// note: replaces first.
{
const auto info = contacts._find("bla@example.com");
g_assert_false(info);
}
{
const auto info = contacts._find("foo.BAR@example.com");
g_assert_true(info);
g_assert_cmpstr(info->email.c_str(), ==, "Foo.Bar@Example.Com");
}
contacts.clear();
g_assert_true(contacts.empty());
g_assert_cmpuint(contacts.size(), ==, 0);
}
static void
test_mu_contacts_cache_personal()
{
Mu::StringVec personal = {"foo@example.com", "bar@cuux.org", "/bar-.*@fnorb.f./"};
Mu::ContactsCache contacts{"", personal};
g_assert_true(contacts.is_personal("foo@example.com"));
g_assert_true(contacts.is_personal("Bar@CuuX.orG"));
g_assert_true(contacts.is_personal("bar-123abc@fnorb.fi"));
g_assert_true(contacts.is_personal("bar-zzz@fnorb.fr"));
g_assert_false(contacts.is_personal("foo@bar.com"));
g_assert_false(contacts.is_personal("BÂr@CuuX.orG"));
g_assert_false(contacts.is_personal("bar@fnorb.fi"));
g_assert_false(contacts.is_personal("bar-zzz@fnorb.xr"));
}
static void
test_mu_contacts_cache_sort()
{
auto result_chars = [](const Mu::ContactsCache& ccache)->std::string {
std::string str;
if (g_test_verbose())
g_print("contacts-cache:\n");
ccache.for_each([&](auto&& contact) {
if (g_test_verbose())
g_print("\t- %s\n", contact.display_name().c_str());
str += contact.name;
return true;
});
return str;
};
const auto now{std::time({})};
// "first" means more relevant
{ /* recent messages, newer comes first */
Mu::ContactsCache ccache("");
ccache.add(Mu::Contact{"a@example.com", "a", now, true, 1000, 0});
ccache.add(Mu::Contact{"b@example.com", "b", now-1, true, 1000, 0});
assert_equal(result_chars(ccache), "ab");
}
{ /* non-recent messages, more frequent comes first */
Mu::ContactsCache ccache("");
ccache.add(Mu::Contact{"a@example.com", "a", now-2*RecentOffset, true, 1000, 0});
ccache.add(Mu::Contact{"b@example.com", "b", now-3*RecentOffset, true, 2000, 0});
assert_equal(result_chars(ccache), "ba");
}
{ /* personal comes first */
Mu::ContactsCache ccache("");
ccache.add(Mu::Contact{"a@example.com", "a", now-5*RecentOffset, true, 1000, 0});
ccache.add(Mu::Contact{"b@example.com", "b", now, false, 8000, 0});
assert_equal(result_chars(ccache), "ab");
}
{ /* if all else fails, reverse-alphabetically */
Mu::ContactsCache ccache("");
ccache.add(Mu::Contact{"a@example.com", "a", now, false, 1000, 0});
ccache.add(Mu::Contact{"b@example.com", "b", now, false, 1000, 0});
g_assert_cmpuint(ccache.size(),==,2);
assert_equal(result_chars(ccache), "ba");
}
}
int
main(int argc, char* argv[])
{
g_test_init(&argc, &argv, NULL);
g_test_add_func("/lib/contacts-cache/base", test_mu_contacts_cache_base);
g_test_add_func("/lib/contacts-cache/personal", test_mu_contacts_cache_personal);
g_test_add_func("/lib/contacts-cache/sort", test_mu_contacts_cache_sort);
g_log_set_handler(
NULL,
(GLogLevelFlags)(G_LOG_LEVEL_MASK | G_LOG_FLAG_FATAL | G_LOG_FLAG_RECURSION),
(GLogFunc)black_hole,
NULL);
return g_test_run();
}
#endif /*BUILD_TESTS*/