mirror of
https://github.com/djcb/mu.git
synced 2024-06-30 08:01:07 +02:00
Return in the contacts in *reverse* rank order, i.e. the most relevant come first. This is useful since we only want the first maxnum contacts, and those should of course be the most relevant. Update mu cfind/server as well. cfind
487 lines
12 KiB
C++
487 lines
12 KiB
C++
/*
|
|
** Copyright (C) 2019-2022 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
|
**
|
|
** This program is free software; you can redistribute it and/or modify it
|
|
** under the terms of the GNU General Public License as published by the
|
|
** Free Software Foundation; either version 3, or (at your option) any
|
|
** later version.
|
|
**
|
|
** This program is distributed in the hope that it will be useful,
|
|
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
** GNU General Public License for more details.
|
|
**
|
|
** You should have received a copy of the GNU General Public License
|
|
** along with this program; if not, write to the Free Software Foundation,
|
|
** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
**
|
|
*/
|
|
|
|
#include "mu-contacts-cache.hh"
|
|
|
|
#include <mutex>
|
|
#include <unordered_map>
|
|
#include <set>
|
|
#include <sstream>
|
|
#include <functional>
|
|
#include <algorithm>
|
|
#include <regex>
|
|
#include <ctime>
|
|
|
|
#include <utils/mu-utils.hh>
|
|
#include <glib.h>
|
|
|
|
using namespace Mu;
|
|
|
|
struct EmailHash {
|
|
std::size_t operator()(const std::string& email) const {
|
|
return lowercase_hash(email);
|
|
}
|
|
};
|
|
struct EmailEqual {
|
|
bool operator()(const std::string& email1, const std::string& email2) const {
|
|
return lowercase_hash(email1) == lowercase_hash(email2);
|
|
}
|
|
};
|
|
|
|
using ContactUMap = std::unordered_map<const std::string, Contact, EmailHash, EmailEqual>;
|
|
struct ContactsCache::Private {
|
|
Private(const std::string& serialized, const StringVec& personal)
|
|
: contacts_{deserialize(serialized)},
|
|
personal_plain_{make_personal_plain(personal)},
|
|
personal_rx_{make_personal_rx(personal)},
|
|
dirty_{0}
|
|
{}
|
|
|
|
ContactUMap deserialize(const std::string&) const;
|
|
std::string serialize() const;
|
|
|
|
ContactUMap contacts_;
|
|
std::mutex mtx_;
|
|
|
|
const StringVec personal_plain_;
|
|
const std::vector<std::regex> personal_rx_;
|
|
|
|
size_t dirty_;
|
|
|
|
private:
|
|
/**
|
|
* Return the non-regex addresses
|
|
*
|
|
* @param personal
|
|
*
|
|
* @return
|
|
*/
|
|
StringVec make_personal_plain(const StringVec& personal) const {
|
|
StringVec svec;
|
|
std::copy_if(personal.begin(), personal.end(),
|
|
std::back_inserter(svec), [&](auto&& p) {
|
|
return p.size() < 2
|
|
|| p.at(0) != '/' || p.at(p.length() - 1) != '/';
|
|
});
|
|
return svec;
|
|
}
|
|
|
|
/**
|
|
* Return regexps for the regex-addresses
|
|
*
|
|
* @param personal
|
|
*
|
|
* @return
|
|
*/
|
|
std::vector<std::regex> make_personal_rx(const StringVec& personal) const {
|
|
std::vector<std::regex> rxvec;
|
|
for(auto&& p: personal) {
|
|
if (p.size() < 2 || p[0] != '/' || p[p.length()- 1] != '/')
|
|
continue;
|
|
// a regex pattern.
|
|
try {
|
|
const auto rxstr{p.substr(1, p.length() - 2)};
|
|
rxvec.emplace_back(std::regex(
|
|
rxstr, std::regex::basic | std::regex::optimize |
|
|
std::regex::icase));
|
|
} catch (const std::regex_error& rex) {
|
|
g_warning("invalid personal address regexp '%s': %s",
|
|
p.c_str(),
|
|
rex.what());
|
|
}
|
|
}
|
|
return rxvec;
|
|
}
|
|
};
|
|
|
|
constexpr auto Separator = "\xff"; // Invalid in UTF-8
|
|
|
|
|
|
ContactUMap
|
|
ContactsCache::Private::deserialize(const std::string& serialized) const
|
|
{
|
|
ContactUMap contacts;
|
|
std::stringstream ss{serialized, std::ios_base::in};
|
|
std::string line;
|
|
|
|
while (getline(ss, line)) {
|
|
const auto parts = Mu::split(line, Separator);
|
|
if (G_UNLIKELY(parts.size() != 6)) {
|
|
g_warning("error: '%s'", line.c_str());
|
|
continue;
|
|
}
|
|
Contact ci(parts[1], // email
|
|
std::move(parts[2]), // name
|
|
(time_t)g_ascii_strtoll(parts[4].c_str(), NULL, 10), // message_date
|
|
parts[3][0] == '1' ? true : false, // personal
|
|
(std::size_t)g_ascii_strtoll(parts[5].c_str(), NULL, 10), // frequency
|
|
g_get_monotonic_time()); // tstamp
|
|
contacts.emplace(std::move(parts[1]), std::move(ci));
|
|
}
|
|
|
|
return contacts;
|
|
}
|
|
|
|
ContactsCache::ContactsCache(const std::string& serialized, const StringVec& personal)
|
|
: priv_{std::make_unique<Private>(serialized, personal)}
|
|
{
|
|
}
|
|
|
|
ContactsCache::~ContactsCache() = default;
|
|
std::string
|
|
ContactsCache::serialize() const
|
|
{
|
|
std::lock_guard<std::mutex> l_{priv_->mtx_};
|
|
std::string s;
|
|
|
|
for (auto& item : priv_->contacts_) {
|
|
const auto& ci{item.second};
|
|
s += Mu::format("%s%s"
|
|
"%s%s"
|
|
"%s%s"
|
|
"%d%s"
|
|
"%" G_GINT64_FORMAT "%s"
|
|
"%" G_GINT64_FORMAT "\n",
|
|
ci.display_name().c_str(),
|
|
Separator,
|
|
ci.email.c_str(),
|
|
Separator,
|
|
ci.name.c_str(),
|
|
Separator,
|
|
ci.personal ? 1 : 0,
|
|
Separator,
|
|
(gint64)ci.message_date,
|
|
Separator,
|
|
(gint64)ci.frequency);
|
|
}
|
|
|
|
priv_->dirty_ = 0;
|
|
|
|
return s;
|
|
}
|
|
|
|
bool
|
|
ContactsCache::dirty() const
|
|
{
|
|
return priv_->dirty_;
|
|
}
|
|
|
|
//const Contact
|
|
void
|
|
ContactsCache::add(Contact&& contact)
|
|
{
|
|
std::lock_guard<std::mutex> l_{priv_->mtx_};
|
|
|
|
++priv_->dirty_;
|
|
|
|
auto it = priv_->contacts_.find(contact.email);
|
|
|
|
if (it == priv_->contacts_.end()) { // completely new contact
|
|
|
|
contact.name = contact.name;
|
|
if (!contact.personal)
|
|
contact.personal = is_personal(contact.email);
|
|
contact.tstamp = g_get_monotonic_time();
|
|
|
|
auto email{contact.email};
|
|
// return priv_->contacts_.emplace(ContactUMap::value_type(email, std::move(contact)))
|
|
// .first->second;
|
|
|
|
priv_->contacts_.emplace(ContactUMap::value_type(email, std::move(contact)));
|
|
|
|
} else { // existing contact.
|
|
auto& existing{it->second};
|
|
++existing.frequency;
|
|
if (contact.message_date > existing.message_date) { // update?
|
|
existing.email = std::move(contact.email);
|
|
// update name only if new one is not empty.
|
|
if (!contact.name.empty())
|
|
existing.name = std::move(contact.name);
|
|
existing.tstamp = g_get_monotonic_time();
|
|
existing.message_date = contact.message_date;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
ContactsCache::add(Contacts&& contacts, bool& personal)
|
|
{
|
|
personal = seq_find_if(contacts,[&](auto&& c){
|
|
return is_personal(c.email); }) != contacts.cend();
|
|
|
|
for (auto&& contact: contacts) {
|
|
contact.personal = personal;
|
|
add(std::move(contact));
|
|
}
|
|
}
|
|
|
|
|
|
const Contact*
|
|
ContactsCache::_find(const std::string& email) const
|
|
{
|
|
std::lock_guard<std::mutex> l_{priv_->mtx_};
|
|
|
|
const auto it = priv_->contacts_.find(email);
|
|
if (it == priv_->contacts_.end())
|
|
return {};
|
|
else
|
|
return &it->second;
|
|
}
|
|
|
|
void
|
|
ContactsCache::clear()
|
|
{
|
|
std::lock_guard<std::mutex> l_{priv_->mtx_};
|
|
|
|
++priv_->dirty_;
|
|
|
|
priv_->contacts_.clear();
|
|
}
|
|
|
|
std::size_t
|
|
ContactsCache::size() const
|
|
{
|
|
std::lock_guard<std::mutex> l_{priv_->mtx_};
|
|
|
|
return priv_->contacts_.size();
|
|
}
|
|
|
|
|
|
/**
|
|
* This is used for sorting the Contacts in order of relevance. A highly
|
|
* specific algorithm, but the details don't matter _too_ much.
|
|
*
|
|
* This is currently used for the ordering in mu-cfind and auto-completion in
|
|
* mu4e, if the various completion methods don't override it...
|
|
*/
|
|
constexpr auto RecentOffset{15 * 24 * 3600};
|
|
struct ContactLessThan {
|
|
ContactLessThan()
|
|
: recently_{::time({}) - RecentOffset} {}
|
|
|
|
|
|
bool operator()(const Mu::Contact& ci1, const Mu::Contact& ci2) const
|
|
{
|
|
// non-personal is less relevant.
|
|
if (ci1.personal != ci2.personal)
|
|
return ci1.personal < ci2.personal;
|
|
|
|
// older is less relevant for recent messages
|
|
if (std::max(ci1.message_date, ci2.message_date) > recently_ &&
|
|
ci1.message_date != ci2.message_date)
|
|
return ci1.message_date < ci2.message_date;
|
|
|
|
// less frequent is less relevant
|
|
if (ci1.frequency != ci2.frequency)
|
|
return ci1.frequency < ci2.frequency;
|
|
|
|
// if all else fails, alphabetically
|
|
return ci1.email < ci2.email;
|
|
}
|
|
// only sort recently seen contacts by recency; approx 15 days.
|
|
// this changes during the lifetime, but that's all fine.
|
|
const time_t recently_;
|
|
};
|
|
|
|
using ContactSet = std::set<std::reference_wrapper<const Contact>,
|
|
ContactLessThan>;
|
|
|
|
void
|
|
ContactsCache::for_each(const EachContactFunc& each_contact) const
|
|
{
|
|
std::lock_guard<std::mutex> l_{priv_->mtx_};
|
|
|
|
if (!each_contact)
|
|
return; // nothing to do
|
|
|
|
// first sort them for 'rank'
|
|
ContactSet sorted;
|
|
for (const auto& item : priv_->contacts_)
|
|
sorted.emplace(item.second);
|
|
|
|
// return in _reverse_ order, so we get the most relevant ones first.
|
|
for (auto it = sorted.rbegin(); it != sorted.rend(); ++it) {
|
|
if (!each_contact(*it))
|
|
break;
|
|
}
|
|
}
|
|
|
|
bool
|
|
ContactsCache::is_personal(const std::string& addr) const
|
|
{
|
|
for (auto&& p : priv_->personal_plain_)
|
|
if (g_ascii_strcasecmp(addr.c_str(), p.c_str()) == 0)
|
|
return true;
|
|
|
|
for (auto&& rx : priv_->personal_rx_) {
|
|
std::smatch m; // perhaps cache addr in personal_plain_?
|
|
if (std::regex_match(addr, m, rx))
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
#ifdef BUILD_TESTS
|
|
/*
|
|
* Tests.
|
|
*
|
|
*/
|
|
|
|
#include "test-mu-common.hh"
|
|
|
|
static void
|
|
test_mu_contacts_cache_base()
|
|
{
|
|
Mu::ContactsCache contacts("");
|
|
|
|
g_assert_true(contacts.empty());
|
|
g_assert_cmpuint(contacts.size(), ==, 0);
|
|
|
|
contacts.add(Mu::Contact("foo.bar@example.com",
|
|
"Foo", {}, 12345));
|
|
g_assert_false(contacts.empty());
|
|
g_assert_cmpuint(contacts.size(), ==, 1);
|
|
|
|
contacts.add(Mu::Contact("cuux@example.com", "Cuux", {},
|
|
54321));
|
|
|
|
g_assert_cmpuint(contacts.size(), ==, 2);
|
|
|
|
contacts.add(
|
|
Mu::Contact("foo.bar@example.com", "Foo", {}, 77777));
|
|
g_assert_cmpuint(contacts.size(), ==, 2);
|
|
|
|
contacts.add(
|
|
Mu::Contact("Foo.Bar@Example.Com", "Foo", {}, 88888));
|
|
g_assert_cmpuint(contacts.size(), ==, 2);
|
|
// note: replaces first.
|
|
|
|
{
|
|
const auto info = contacts._find("bla@example.com");
|
|
g_assert_false(info);
|
|
}
|
|
|
|
{
|
|
const auto info = contacts._find("foo.BAR@example.com");
|
|
g_assert_true(info);
|
|
|
|
g_assert_cmpstr(info->email.c_str(), ==, "Foo.Bar@Example.Com");
|
|
}
|
|
|
|
contacts.clear();
|
|
g_assert_true(contacts.empty());
|
|
g_assert_cmpuint(contacts.size(), ==, 0);
|
|
}
|
|
|
|
static void
|
|
test_mu_contacts_cache_personal()
|
|
{
|
|
Mu::StringVec personal = {"foo@example.com", "bar@cuux.org", "/bar-.*@fnorb.f./"};
|
|
Mu::ContactsCache contacts{"", personal};
|
|
|
|
g_assert_true(contacts.is_personal("foo@example.com"));
|
|
g_assert_true(contacts.is_personal("Bar@CuuX.orG"));
|
|
g_assert_true(contacts.is_personal("bar-123abc@fnorb.fi"));
|
|
g_assert_true(contacts.is_personal("bar-zzz@fnorb.fr"));
|
|
|
|
g_assert_false(contacts.is_personal("foo@bar.com"));
|
|
g_assert_false(contacts.is_personal("BÂr@CuuX.orG"));
|
|
g_assert_false(contacts.is_personal("bar@fnorb.fi"));
|
|
g_assert_false(contacts.is_personal("bar-zzz@fnorb.xr"));
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
test_mu_contacts_cache_sort()
|
|
{
|
|
auto result_chars = [](const Mu::ContactsCache& ccache)->std::string {
|
|
std::string str;
|
|
if (g_test_verbose())
|
|
g_print("contacts-cache:\n");
|
|
ccache.for_each([&](auto&& contact) {
|
|
if (g_test_verbose())
|
|
g_print("\t- %s\n", contact.display_name().c_str());
|
|
str += contact.name;
|
|
return true;
|
|
});
|
|
return str;
|
|
};
|
|
|
|
|
|
const auto now{std::time({})};
|
|
|
|
// "first" means more relevant
|
|
|
|
{ /* recent messages, newer comes first */
|
|
|
|
Mu::ContactsCache ccache("");
|
|
ccache.add(Mu::Contact{"a@example.com", "a", now, true, 1000, 0});
|
|
ccache.add(Mu::Contact{"b@example.com", "b", now-1, true, 1000, 0});
|
|
assert_equal(result_chars(ccache), "ab");
|
|
}
|
|
|
|
{ /* non-recent messages, more frequent comes first */
|
|
|
|
Mu::ContactsCache ccache("");
|
|
ccache.add(Mu::Contact{"a@example.com", "a", now-2*RecentOffset, true, 1000, 0});
|
|
ccache.add(Mu::Contact{"b@example.com", "b", now-3*RecentOffset, true, 2000, 0});
|
|
assert_equal(result_chars(ccache), "ba");
|
|
}
|
|
|
|
{ /* personal comes first */
|
|
|
|
Mu::ContactsCache ccache("");
|
|
ccache.add(Mu::Contact{"a@example.com", "a", now-5*RecentOffset, true, 1000, 0});
|
|
ccache.add(Mu::Contact{"b@example.com", "b", now, false, 8000, 0});
|
|
assert_equal(result_chars(ccache), "ab");
|
|
}
|
|
|
|
{ /* if all else fails, reverse-alphabetically */
|
|
Mu::ContactsCache ccache("");
|
|
ccache.add(Mu::Contact{"a@example.com", "a", now, false, 1000, 0});
|
|
ccache.add(Mu::Contact{"b@example.com", "b", now, false, 1000, 0});
|
|
g_assert_cmpuint(ccache.size(),==,2);
|
|
assert_equal(result_chars(ccache), "ba");
|
|
}
|
|
}
|
|
|
|
|
|
int
|
|
main(int argc, char* argv[])
|
|
{
|
|
g_test_init(&argc, &argv, NULL);
|
|
|
|
g_test_add_func("/lib/contacts-cache/base", test_mu_contacts_cache_base);
|
|
g_test_add_func("/lib/contacts-cache/personal", test_mu_contacts_cache_personal);
|
|
g_test_add_func("/lib/contacts-cache/sort", test_mu_contacts_cache_sort);
|
|
|
|
g_log_set_handler(
|
|
NULL,
|
|
(GLogLevelFlags)(G_LOG_LEVEL_MASK | G_LOG_FLAG_FATAL | G_LOG_FLAG_RECURSION),
|
|
(GLogFunc)black_hole,
|
|
NULL);
|
|
|
|
return g_test_run();
|
|
}
|
|
#endif /*BUILD_TESTS*/
|