mirror of https://github.com/djcb/mu.git
lib: support 'personal' regexp, move to mu-contacts
Move the determination of "personal" to MuContacts; add support for regexps (POSIX-basic, in //)
This commit is contained in:
parent
5cd6226ebd
commit
dbff5671dd
|
@ -25,6 +25,7 @@
|
|||
#include <sstream>
|
||||
#include <functional>
|
||||
#include <algorithm>
|
||||
#include <regex>
|
||||
|
||||
#include <utils/mu-utils.hh>
|
||||
#include <glib.h>
|
||||
|
@ -34,7 +35,21 @@ using namespace Mu;
|
|||
ContactInfo::ContactInfo (const std::string& _full_address,
|
||||
const std::string& _email,
|
||||
const std::string& _name,
|
||||
bool _personal, time_t _last_seen, size_t _freq):
|
||||
time_t _last_seen):
|
||||
full_address{_full_address},
|
||||
email{_email},
|
||||
name{_name},
|
||||
last_seen{_last_seen},
|
||||
freq{1},
|
||||
tstamp{g_get_monotonic_time()} {}
|
||||
|
||||
|
||||
ContactInfo::ContactInfo (const std::string& _full_address,
|
||||
const std::string& _email,
|
||||
const std::string& _name,
|
||||
bool _personal,
|
||||
time_t _last_seen,
|
||||
size_t _freq):
|
||||
full_address{_full_address},
|
||||
email{_email},
|
||||
name{_name},
|
||||
|
@ -43,7 +58,6 @@ ContactInfo::ContactInfo (const std::string& _full_address,
|
|||
freq{_freq},
|
||||
tstamp{g_get_monotonic_time()} {}
|
||||
|
||||
|
||||
struct EmailHash {
|
||||
std::size_t operator()(const std::string& email) const {
|
||||
std::size_t djb = 5381; // djb hash
|
||||
|
@ -95,19 +109,55 @@ using ContactUMap = std::unordered_map<const std::string, ContactInfo, EmailHash
|
|||
using ContactSet = std::set<std::reference_wrapper<const ContactInfo>, ContactInfoLessThan>;
|
||||
|
||||
struct Contacts::Private {
|
||||
Private(const std::string& serialized):
|
||||
contacts_{deserialize(serialized)}
|
||||
{}
|
||||
Private(const std::string& serialized,
|
||||
const StringVec& personal):
|
||||
contacts_{deserialize(serialized)} {
|
||||
make_personal(personal);
|
||||
}
|
||||
|
||||
void make_personal(const StringVec& personal);
|
||||
ContactUMap deserialize(const std::string&) const;
|
||||
std::string serialize() const;
|
||||
|
||||
ContactUMap contacts_;
|
||||
std::mutex mtx_;
|
||||
|
||||
StringVec personal_plain_;
|
||||
std::vector<std::regex> personal_rx_;
|
||||
};
|
||||
|
||||
constexpr auto Separator = "\xff"; // Invalid in UTF-8
|
||||
|
||||
void
|
||||
Contacts::Private::make_personal (const StringVec& personal)
|
||||
{
|
||||
for (auto&& p: personal) {
|
||||
|
||||
if (p.empty())
|
||||
continue; // invalid
|
||||
|
||||
if (p.size() < 2 || p.at(0) != '/' || p.at(p.length() - 1) != '/')
|
||||
personal_plain_.emplace_back(p); // normal address
|
||||
else {
|
||||
// a regex pattern.
|
||||
try {
|
||||
const auto rxstr{p.substr(1, p.length()-2)};
|
||||
personal_rx_.emplace_back(
|
||||
std::regex(rxstr,
|
||||
std::regex::basic |
|
||||
std::regex::optimize |
|
||||
std::regex::icase));
|
||||
|
||||
} catch (const std::regex_error& rex) {
|
||||
g_warning ("invalid personal address regexp '%s': %s",
|
||||
p.c_str(), rex.what());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
ContactUMap
|
||||
Contacts::Private::deserialize(const std::string& serialized) const
|
||||
{
|
||||
|
@ -131,15 +181,14 @@ Contacts::Private::deserialize(const std::string& serialized) const
|
|||
(std::size_t)g_ascii_strtoll(parts[5].c_str(), NULL, 10)); // freq
|
||||
|
||||
contacts.emplace(std::move(parts[1]), std::move(ci));
|
||||
|
||||
}
|
||||
|
||||
return contacts;
|
||||
}
|
||||
|
||||
|
||||
Contacts::Contacts (const std::string& serialized) :
|
||||
priv_{std::make_unique<Private>(serialized)}
|
||||
Contacts::Contacts (const std::string& serialized, const StringVec& personal) :
|
||||
priv_{std::make_unique<Private>(serialized, personal)}
|
||||
{}
|
||||
|
||||
Contacts::~Contacts() = default;
|
||||
|
@ -170,44 +219,42 @@ Contacts::serialize() const
|
|||
}
|
||||
|
||||
|
||||
// for now, we only care about _not_ having newlines.
|
||||
static void
|
||||
wash (std::string& str)
|
||||
{
|
||||
str.erase(std::remove(str.begin(), str.end(), '\n'), str.end());
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
Contacts::add (ContactInfo&& ci)
|
||||
{
|
||||
std::lock_guard<std::mutex> l_{priv_->mtx_};
|
||||
|
||||
auto down = g_ascii_strdown (ci.email.c_str(), -1);
|
||||
std::string email{down};
|
||||
g_free(down);
|
||||
auto it = priv_->contacts_.find(ci.email);
|
||||
|
||||
auto it = priv_->contacts_.find(email);
|
||||
if (it != priv_->contacts_.end()) {
|
||||
auto& ci2 = it->second;
|
||||
++ci2.freq;
|
||||
if (ci.last_seen > ci2.last_seen) {
|
||||
ci2.last_seen = ci.last_seen;
|
||||
wash(ci.email);
|
||||
ci2.email = std::move(ci.email);
|
||||
if (!ci.name.empty()) {
|
||||
wash(ci.name);
|
||||
ci2.name = std::move(ci.name);
|
||||
}
|
||||
if (it == priv_->contacts_.end()) { // completely new contact
|
||||
wash(ci.name);
|
||||
wash(ci.full_address);
|
||||
ci.freq = 1;
|
||||
ci.personal = is_personal(ci.email);
|
||||
auto email{ci.email};
|
||||
priv_->contacts_.emplace(ContactUMap::value_type(email, std::move(ci)));
|
||||
} else { // existing contact.
|
||||
auto& ci_existing{it->second};
|
||||
++ci_existing.freq;
|
||||
|
||||
if (ci.last_seen > ci_existing.last_seen) {
|
||||
// update.
|
||||
wash(ci.name);
|
||||
ci_existing.name = std::move(ci.name);
|
||||
|
||||
ci_existing.email = std::move(ci.email);
|
||||
|
||||
wash(ci.full_address);
|
||||
ci_existing.full_address = std::move(ci.full_address);
|
||||
ci_existing.tstamp = g_get_monotonic_time();
|
||||
}
|
||||
}
|
||||
|
||||
wash(ci.name);
|
||||
wash(ci.email);
|
||||
wash(ci.full_address);
|
||||
|
||||
priv_->contacts_.emplace(
|
||||
ContactUMap::value_type(std::move(email), std::move(ci)));
|
||||
}
|
||||
|
||||
|
||||
|
@ -216,8 +263,7 @@ Contacts::_find (const std::string& email) const
|
|||
{
|
||||
std::lock_guard<std::mutex> l_{priv_->mtx_};
|
||||
|
||||
ContactInfo ci{"", email, "", false, 0};
|
||||
const auto it = priv_->contacts_.find(ci.email);
|
||||
const auto it = priv_->contacts_.find(email);
|
||||
if (it == priv_->contacts_.end())
|
||||
return {};
|
||||
else
|
||||
|
@ -260,6 +306,23 @@ Contacts::for_each(const EachContactFunc& each_contact) const
|
|||
each_contact (ci);
|
||||
}
|
||||
|
||||
bool
|
||||
Contacts::is_personal(const std::string& addr) const
|
||||
{
|
||||
for (auto&& p: priv_->personal_plain_)
|
||||
if (g_ascii_strcasecmp(addr.c_str(), p.c_str()) == 0)
|
||||
return true;
|
||||
|
||||
for (auto&& rx: priv_->personal_rx_) {
|
||||
std::smatch m; // perhaps cache addr in personal_plain_?
|
||||
if (std::regex_match(addr, m, rx))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/// C binding
|
||||
|
||||
size_t
|
||||
|
|
|
@ -34,6 +34,7 @@ typedef struct _MuContacts MuContacts;
|
|||
#include <string>
|
||||
#include <time.h>
|
||||
#include <inttypes.h>
|
||||
#include <utils/mu-utils.hh>
|
||||
|
||||
namespace Mu {
|
||||
|
||||
|
@ -46,25 +47,38 @@ struct ContactInfo {
|
|||
* @param _full_address the full email address + name.
|
||||
* @param _email email address
|
||||
* @param _name name or empty
|
||||
* @param _personal is this a personal contact?
|
||||
* @param _last_seen when was this contact last seen?
|
||||
* @param _freq how often was this contact seen?
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
ContactInfo (const std::string& _full_address,
|
||||
const std::string& _email,
|
||||
const std::string& _name,
|
||||
bool _personal, time_t _last_seen, size_t _freq=1);
|
||||
time_t _last_seen);
|
||||
|
||||
/**
|
||||
* Construct a new ContactInfo
|
||||
*
|
||||
* @param _full_address the full email address + name.
|
||||
* @param _email email address
|
||||
* @param _name name or empty
|
||||
* @param _personal is this a personal contact?
|
||||
* @param _last_seen when was this contact last seen?
|
||||
* @param _freq how often was this contact seen?
|
||||
*/
|
||||
ContactInfo (const std::string& _full_address,
|
||||
const std::string& _email,
|
||||
const std::string& _name,
|
||||
bool personal,
|
||||
time_t _last_seen,
|
||||
size_t freq);
|
||||
|
||||
std::string full_address; /**< Full name <email> */
|
||||
std::string email; /**< email address */
|
||||
std::string name; /**< name (or empty) */
|
||||
bool personal; /**< is this a personal contact? */
|
||||
time_t last_seen; /**< when was this contact last seen? */
|
||||
std::size_t freq; /**< how often was this contact seen? */
|
||||
bool personal{}; /**< is this a personal contact? */
|
||||
time_t last_seen{}; /**< when was this contact last seen? */
|
||||
std::size_t freq{}; /**< how often was this contact seen? */
|
||||
|
||||
int64_t tstamp; /**< Time-stamp, as per g_get_monotonic_time */
|
||||
int64_t tstamp{}; /**< Time-stamp, as per g_get_monotonic_time */
|
||||
};
|
||||
|
||||
/// All contacts
|
||||
|
@ -74,8 +88,10 @@ public:
|
|||
* Construct a new contacts objects
|
||||
*
|
||||
* @param serialized serialized contacts
|
||||
* @param personal personal addresses
|
||||
*/
|
||||
Contacts (const std::string& serialized = "");
|
||||
Contacts (const std::string& serialized = "",
|
||||
const StringVec& personal={});
|
||||
|
||||
/**
|
||||
* DTOR
|
||||
|
@ -118,6 +134,16 @@ public:
|
|||
*/
|
||||
std::string serialize() const;
|
||||
|
||||
|
||||
/**
|
||||
* Does this look like a 'personal' address?
|
||||
*
|
||||
* @param addr some e-mail address
|
||||
*
|
||||
* @return true or false
|
||||
*/
|
||||
bool is_personal(const std::string& addr) const;
|
||||
|
||||
/**
|
||||
* Find a contact based on the email address. This is not safe, since
|
||||
* the returned ptr can be invalidated at any time; only for unit-tests.
|
||||
|
|
|
@ -114,7 +114,7 @@ struct Store::Private {
|
|||
Private (const std::string& path, bool readonly):
|
||||
db_{make_xapian(path, readonly ? XapianOpts::ReadOnly : XapianOpts::Open)},
|
||||
mdata_{make_metadata(path)},
|
||||
contacts_{db()->get_metadata(ContactsKey)} {
|
||||
contacts_{db()->get_metadata(ContactsKey), mdata_.personal_addresses} {
|
||||
|
||||
if (!readonly)
|
||||
wdb()->begin_transaction();
|
||||
|
@ -123,7 +123,8 @@ struct Store::Private {
|
|||
Private (const std::string& path, const std::string& root_maildir,
|
||||
const StringVec& personal_addresses, const Store::Config& conf):
|
||||
db_{make_xapian(path, XapianOpts::CreateOverwrite)},
|
||||
mdata_{init_metadata(conf, path, root_maildir, personal_addresses)} {
|
||||
mdata_{init_metadata(conf, path, root_maildir, personal_addresses)},
|
||||
contacts_{"", mdata_.personal_addresses} {
|
||||
|
||||
wdb()->begin_transaction();
|
||||
}
|
||||
|
@ -307,7 +308,6 @@ Store::metadata() const
|
|||
const Contacts&
|
||||
Store::contacts() const
|
||||
{
|
||||
LOCKED;
|
||||
return priv_->contacts_;
|
||||
}
|
||||
|
||||
|
@ -1045,32 +1045,11 @@ each_contact_info (MuMsgContact *contact, MsgDoc *msgdoc)
|
|||
contacts.add(Mu::ContactInfo(contact->full_address,
|
||||
contact->email,
|
||||
contact->name ? contact->name : "",
|
||||
msgdoc->_personal,
|
||||
mu_msg_get_date(msgdoc->_msg)));
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
static gboolean
|
||||
each_contact_check_if_personal (MuMsgContact *contact, MsgDoc *msgdoc)
|
||||
{
|
||||
if (msgdoc->_personal || !contact->email)
|
||||
return TRUE;
|
||||
|
||||
for (const auto& cur : *msgdoc->_my_addresses) {
|
||||
if (g_ascii_strcasecmp
|
||||
(contact->email,
|
||||
(const char*)cur.c_str()) == 0) {
|
||||
msgdoc->_personal = TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static Xapian::Document
|
||||
new_doc_from_message (MuStore *store, MuMsg *msg)
|
||||
{
|
||||
|
@ -1079,17 +1058,20 @@ new_doc_from_message (MuStore *store, MuMsg *msg)
|
|||
|
||||
mu_msg_field_foreach ((MuMsgFieldForeachFunc)add_terms_values, &docinfo);
|
||||
|
||||
/* determine whether this is 'personal' email, ie. one of my
|
||||
* e-mail addresses is explicitly mentioned -- it's not a
|
||||
* mailing list message. Callback will update docinfo->_personal */
|
||||
const auto& personal_addresses = self(store)->metadata().personal_addresses;
|
||||
if (personal_addresses.size()) {
|
||||
docinfo._my_addresses = &personal_addresses;
|
||||
mu_msg_contact_foreach
|
||||
(msg,
|
||||
(MuMsgContactForeachFunc)each_contact_check_if_personal,
|
||||
&docinfo);
|
||||
}
|
||||
mu_msg_contact_foreach
|
||||
(msg, [](auto contact, gpointer msgdocptr)->gboolean {
|
||||
auto msgdoc{reinterpret_cast<MsgDoc*>(msgdocptr)};
|
||||
|
||||
if (!contact->email)
|
||||
return FALSE; // invalid contact
|
||||
else if (msgdoc->_personal)
|
||||
return TRUE; // already deemed personal
|
||||
|
||||
if (msgdoc->_store->contacts().is_personal(contact->email))
|
||||
msgdoc->_personal = true; // this one's personal.
|
||||
|
||||
return TRUE;
|
||||
}, &docinfo);
|
||||
|
||||
/* also store the contact-info as separate terms, and add it
|
||||
* to the cache */
|
||||
|
|
|
@ -96,8 +96,6 @@ public:
|
|||
* @return the metadata
|
||||
*/
|
||||
const Metadata& metadata() const;
|
||||
|
||||
|
||||
/**
|
||||
* Get the Contacts object for this store
|
||||
*
|
||||
|
@ -105,7 +103,6 @@ public:
|
|||
*/
|
||||
const Contacts& contacts() const;
|
||||
|
||||
|
||||
/**
|
||||
* Get the Indexer associated with this store. It is an error
|
||||
* to call this on a read-only store.
|
||||
|
@ -177,7 +174,6 @@ public:
|
|||
*/
|
||||
bool contains_message (const std::string& path) const;
|
||||
|
||||
|
||||
/**
|
||||
* Prototype for the ForEachFunc
|
||||
*
|
||||
|
|
|
@ -33,25 +33,21 @@ test_mu_contacts_01()
|
|||
g_assert_cmpuint (contacts.size(), ==, 0);
|
||||
|
||||
contacts.add(std::move(Mu::ContactInfo ("Foo <foo.bar@example.com>",
|
||||
"foo.bar@example.com", "Foo",
|
||||
false, 12345)));
|
||||
"foo.bar@example.com", "Foo", 12345)));
|
||||
g_assert_false (contacts.empty());
|
||||
g_assert_cmpuint (contacts.size(), ==, 1);
|
||||
|
||||
contacts.add(std::move(Mu::ContactInfo ("Cuux <cuux.fnorb@example.com>",
|
||||
"cuux@example.com", "Cuux", true,
|
||||
54321)));
|
||||
"cuux@example.com", "Cuux", 54321)));
|
||||
|
||||
g_assert_cmpuint (contacts.size(), ==, 2);
|
||||
|
||||
contacts.add(std::move(Mu::ContactInfo ("foo.bar@example.com",
|
||||
"foo.bar@example.com", "Foo",
|
||||
false, 77777)));
|
||||
"foo.bar@example.com", "Foo", 77777)));
|
||||
g_assert_cmpuint (contacts.size(), ==, 2);
|
||||
|
||||
contacts.add(std::move(Mu::ContactInfo ("Foo.Bar@Example.Com",
|
||||
"Foo.Bar@Example.Com", "Foo",
|
||||
false, 88888)));
|
||||
"Foo.Bar@Example.Com", "Foo", 88888)));
|
||||
g_assert_cmpuint (contacts.size(), ==, 2);
|
||||
// note: replaces first.
|
||||
|
||||
|
@ -60,7 +56,6 @@ test_mu_contacts_01()
|
|||
g_assert_false (info);
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
const auto info = contacts._find("foo.BAR@example.com");
|
||||
g_assert_true (info);
|
||||
|
@ -73,6 +68,27 @@ test_mu_contacts_01()
|
|||
g_assert_cmpuint (contacts.size(), ==, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
test_mu_contacts_02()
|
||||
{
|
||||
Mu::StringVec personal = {
|
||||
"foo@example.com",
|
||||
"bar@cuux.org",
|
||||
"/bar-.*@fnorb.f./"
|
||||
};
|
||||
Mu::Contacts contacts{"", personal};
|
||||
|
||||
g_assert_true (contacts.is_personal("foo@example.com"));
|
||||
g_assert_true (contacts.is_personal("Bar@CuuX.orG"));
|
||||
g_assert_true (contacts.is_personal("bar-123abc@fnorb.fi"));
|
||||
g_assert_true (contacts.is_personal("bar-zzz@fnorb.fr"));
|
||||
|
||||
g_assert_false (contacts.is_personal("foo@bar.com"));
|
||||
g_assert_false (contacts.is_personal("BÂr@CuuX.orG"));
|
||||
g_assert_false (contacts.is_personal("bar@fnorb.fi"));
|
||||
g_assert_false (contacts.is_personal("bar-zzz@fnorb.xr"));
|
||||
}
|
||||
|
||||
|
||||
|
||||
int
|
||||
|
@ -81,6 +97,7 @@ main (int argc, char *argv[])
|
|||
g_test_init (&argc, &argv, NULL);
|
||||
|
||||
g_test_add_func ("/mu-contacts/01", test_mu_contacts_01);
|
||||
g_test_add_func ("/mu-contacts/02", test_mu_contacts_02);
|
||||
|
||||
g_log_set_handler (NULL,
|
||||
(GLogLevelFlags)
|
||||
|
|
|
@ -79,7 +79,6 @@ test_store_add_count_remove ()
|
|||
}
|
||||
|
||||
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH MU-INIT 1 "February 2020" "User Manuals"
|
||||
.TH MU-INIT 1 "October 2020" "User Manuals"
|
||||
|
||||
.SH NAME
|
||||
|
||||
|
@ -10,13 +10,14 @@ mu init \- initialize the mu message database
|
|||
|
||||
.SH DESCRIPTION
|
||||
|
||||
\fBmu init\fR is the \fBmu\fR command for setting up the mu message
|
||||
database. After \fBmu init\fR has completed, you can run \fBmu index\fR
|
||||
\fBmu init\fR is the subcommand for setting up the mu message
|
||||
database. After \fBmu init\fR has completed, you can run \fBmu
|
||||
index\fR
|
||||
|
||||
.SH OPTIONS
|
||||
|
||||
Note, some of the general options are described in the \fBmu(1)\fR man-page and
|
||||
not here, as they apply to multiple mu commands.
|
||||
Note, some of the general options are described in the \fBmu(1)\fR
|
||||
man-page and not here, as they apply to multiple mu commands.
|
||||
|
||||
.TP
|
||||
\fB\-\-muhome\fR
|
||||
|
@ -34,7 +35,6 @@ are not supported.
|
|||
|
||||
.TP
|
||||
\fB\-\-my-address\fR=\fI<my-email-address>\fR
|
||||
|
||||
specifies that some e-mail addresses are 'my-address' (\fB\-\-my-address\fR can
|
||||
be used multiple times). This is used by \fBmu cfind\fR -- any e-mail address
|
||||
found in the address fields of a message which also has \fI<my-email-address>\fR
|
||||
|
@ -42,6 +42,10 @@ in one of its address fields is considered a \fIpersonal\fR e-mail address. This
|
|||
allows you, for example, to filter out (\fBmu cfind --personal\fR) addresses
|
||||
which were merely seen in mailing list messages.
|
||||
|
||||
\fI<my-email-address>\fR can be either a plain e-mail address (such as
|
||||
\fBfoo@example.com\fR), or a regular-expression (of the 'Basic POSIX'
|
||||
flavor), wrapped in \B/\fR (such as \B/foo-.*@example\\.com\fR).
|
||||
|
||||
.SH ENVIRONMENT
|
||||
|
||||
\fBmu init\fR uses \fBMAILDIR\fR to find the user's Maildir if it has not been
|
||||
|
|
Loading…
Reference in New Issue