1
0
mirror of https://github.com/djcb/mu.git synced 2024-06-20 06:46:50 +02:00

mu-contact: move email validation to contacts cache

So we can be sure the regexp is initialized. This _may_ help for
 https://bugzilla.opensuse.org/show_bug.cgi?id=1221861
though it is very hard to tell!
This commit is contained in:
Dirk-Jan C. Binnema 2024-04-01 20:51:40 +03:00
parent 2e40852abc
commit d13feb2d99
4 changed files with 58 additions and 56 deletions

View File

@ -1,5 +1,5 @@
/*
** Copyright (C) 2022-2023 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
** Copyright (C) 2022-2024 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This program is free software; you can redistribute it and/or modify it
** under the terms of the GNU General Public License as published by the
@ -20,7 +20,6 @@
#include "mu-contact.hh"
#include "mu-message.hh"
#include "utils/mu-utils.hh"
#include "utils/mu-regex.hh"
#include "mu-mime-object.hh"
#include <gmime/gmime.h>
@ -47,34 +46,6 @@ Contact::display_name() const
return Mu::quote(name) + " <" + email + '>';
}
static Regex email_rx;
bool
Contact::has_valid_email() const
{
/* regexp as per:
* https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address
*
* "This requirement is a willful violation of RFC 5322, which defines a
* syntax for email addresses that is simultaneously too strict (before
* the "@" character), too vague (after the "@" character), and too lax
* (allowing comments, whitespace characters, and quoted strings in
* manners unfamiliar to most users) to be of practical use here."
*/
constexpr auto email_rx_str =
R"(^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$)";
/* avoid std::regex here, since speed matters. PCRE is faster */
if (!email_rx) {
if (auto&& res = Regex::make(email_rx_str, G_REGEX_OPTIMIZE); !res) {
g_error("BUG: error in regex: %s", res.error().what()); /* terminates process */
} else
email_rx = res.value();
}
return email_rx.matches(email);
}
std::string
Mu::to_string(const Mu::Contacts& contacts)
{
@ -120,7 +91,6 @@ test_ctor_foo()
assert_equal(c.email, "foo@example.com");
assert_equal(c.name, "Foo Bar");
g_assert_true(c.has_valid_email());
g_assert_true(*c.field_id() == Field::Id::Bcc);
g_assert_cmpuint(c.message_date,==,1645214647);
@ -142,7 +112,6 @@ test_ctor_blinky()
assert_equal(c.email, "bar@example.com");
assert_equal(c.name, "Blinky");
g_assert_true(c.has_valid_email());
g_assert_true(c.personal);
g_assert_cmpuint(c.frequency,==,13);
g_assert_cmpuint(c.tstamp,==,12345);
@ -166,7 +135,6 @@ test_ctor_cleanup()
assert_equal(c.email, "bar@example.com");
assert_equal(c.name, "Bli ky");
g_assert_true(c.personal);
g_assert_true(c.has_valid_email());
g_assert_cmpuint(c.frequency,==,13);
g_assert_cmpuint(c.tstamp,==,12345);
g_assert_cmpuint(c.message_date,==,1645215014);
@ -188,7 +156,6 @@ test_encode()
assert_equal(c.email, "cassius@example.com");
assert_equal(c.name, "Ali, Muhammad \"The Greatest\"");
g_assert_true(c.has_valid_email());
g_assert_false(c.personal);
g_assert_cmpuint(c.frequency,==,333);
g_assert_cmpuint(c.tstamp,==,768);
@ -207,7 +174,6 @@ test_sender()
assert_equal(c.email, "aa@example.com");
assert_equal(c.name, "Anders Ångström");
g_assert_true(c.has_valid_email());
g_assert_false(c.personal);
g_assert_cmpuint(c.frequency,==,1);
g_assert_cmpuint(c.message_date,==,54321);
@ -222,15 +188,6 @@ test_misc()
g_assert_false(!!contact_type_from_field_id(Field::Id::Subject));
}
static void
test_broken_email()
{
Contact c{"a***@@booa@example..com", "abcdef",
Contact::Type::Sender, 54321};
g_assert_false(c.has_valid_email());
}
int
main(int argc, char* argv[])
{
@ -244,7 +201,6 @@ main(int argc, char* argv[])
g_test_add_func("/message/contact/sender", test_sender);
g_test_add_func("/message/contact/misc", test_misc);
g_test_add_func("/message/contact/broken-email", test_broken_email);
return g_test_run();
}

View File

@ -1,5 +1,5 @@
/*
** Copyright (C) 2019-2022 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
** Copyright (C) 2019-2024 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This program is free software; you can redistribute it and/or modify it
** under the terms of the GNU General Public License as published by the
@ -53,13 +53,19 @@ struct ContactsCache::Private {
personal_rx_{make_rx_matchers<Config::Id::PersonalAddresses>()},
ignored_plain_{make_matchers<Config::Id::IgnoredAddresses>()},
ignored_rx_{make_rx_matchers<Config::Id::IgnoredAddresses>()},
dirty_{0} {}
dirty_{0},
email_rx_{unwrap(Regex::make(email_rx_str, G_REGEX_OPTIMIZE))}
{}
~Private() { serialize(); }
ContactUMap deserialize(const std::string&) const;
void serialize() const;
bool is_valid_email(const std::string& email) const {
return email_rx_.matches(email);
}
Config& config_db_;
ContactUMap contacts_;
mutable std::mutex mtx_;
@ -70,7 +76,8 @@ struct ContactsCache::Private {
const StringVec ignored_plain_;
const std::vector<Regex> ignored_rx_;
mutable size_t dirty_;
mutable size_t dirty_;
Regex email_rx_;
private:
static bool is_rx(const std::string& p) {
@ -100,6 +107,19 @@ private:
}
return rxvec;
}
/* regexp as per:
* https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address
*
* "This requirement is a willful violation of RFC 5322, which defines a
* syntax for email addresses that is simultaneously too strict (before
* the "@" character), too vague (after the "@" character), and too lax
* (allowing comments, whitespace characters, and quoted strings in
* manners unfamiliar to most users) to be of practical use here."
*/
static constexpr auto email_rx_str =
R"(^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$)";
};
ContactUMap
@ -176,7 +196,7 @@ ContactsCache::add(Contact&& contact)
{
/* we do _not_ cache invalid email addresses, so we won't offer them in completions etc. It
* should be _rare_, but we've seen cases ( broken local messages) */
if (!contact.has_valid_email()) {
if (!is_valid(contact.email)) {
mu_warning("not caching invalid e-mail address '{}'", contact.email);
return;
}
@ -346,6 +366,13 @@ ContactsCache::is_ignored(const std::string& addr) const
return address_matches(addr, priv_->ignored_plain_, priv_->ignored_rx_);
}
bool
ContactsCache::is_valid(const std::string& addr) const
{
return priv_->is_valid_email(addr);
}
#ifdef BUILD_TESTS
/*
* Tests.
@ -554,17 +581,28 @@ test_mu_contacts_cache_sort()
}
}
static void
test_mu_contacts_valid_address()
{
MemDb xdb{};
Config cdb{xdb};
ContactsCache ccache(cdb);
g_assert_true(ccache.is_valid("a@example.com"));
g_assert_false(ccache.is_valid("a***@@booa@example..com"));
}
int
main(int argc, char* argv[])
{
mu_test_init(&argc, &argv);
g_test_add_func("/lib/contacts-cache/base", test_mu_contacts_cache_base);
g_test_add_func("/lib/contacts-cache/personal", test_mu_contacts_cache_personal);
g_test_add_func("/lib/contacts-cache/ignored", test_mu_contacts_cache_ignored);
g_test_add_func("/lib/contacts-cache/for-each", test_mu_contacts_cache_foreach);
g_test_add_func("/lib/contacts-cache/sort", test_mu_contacts_cache_sort);
g_test_add_func("/contacts-cache/base", test_mu_contacts_cache_base);
g_test_add_func("/contacts-cache/personal", test_mu_contacts_cache_personal);
g_test_add_func("/contacts-cache/ignored", test_mu_contacts_cache_ignored);
g_test_add_func("/contacts-cache/for-each", test_mu_contacts_cache_foreach);
g_test_add_func("/contacts-cache/sort", test_mu_contacts_cache_sort);
g_test_add_func("/contacts-cache/valid-address", test_mu_contacts_valid_address);
return g_test_run();
}

View File

@ -116,7 +116,6 @@ public:
*/
bool is_personal(const std::string& addr) const;
/**
* Does this look like an email-address that should be ignored?
*
@ -126,6 +125,15 @@ public:
*/
bool is_ignored(const std::string& addr) const;
/**
* Does this look like a valid email-address?
*
* @param addr some e-mail address
*
* @return true or false
*/
bool is_valid(const std::string& addr) const;
/**
* Find a contact based on the email address. This is not safe, since
* the returned ptr can be invalidated at any time; only for unit-tests.

View File

@ -278,7 +278,7 @@ Mu::mu_cmd_cfind(const Mu::Store& store, const Mu::Options& opts)
if (opts.cfind.maxnum && num > *opts.cfind.maxnum)
return false; /* stop the loop */
if (!contact.has_valid_email())
if (!store.contacts_cache().is_valid(contact.email))
return true; /* next */
// filter for maxnum, personal & "after"