mirror of https://github.com/djcb/mu.git
document: index some sub-parts as well
1. Also add 'normal' terms for some indexable fields 2. Add terms for e-mail address components And add some tests. This helps for some corner-case queries (see tests). Fixes #2278 Fixes #2281
This commit is contained in:
parent
6cb38c8125
commit
df80935c2e
|
@ -30,7 +30,6 @@
|
|||
#include <string>
|
||||
#include <utils/mu-utils.hh>
|
||||
|
||||
|
||||
using namespace Mu;
|
||||
|
||||
constexpr uint8_t SepaChar1 = 0xfe;
|
||||
|
@ -46,7 +45,13 @@ add_search_term(Xapian::Document& doc, const Field& field, const std::string& va
|
|||
} else if (field.is_indexable_term()) {
|
||||
Xapian::TermGenerator termgen;
|
||||
termgen.set_document(doc);
|
||||
termgen.index_text(utf8_flatten(val),1,field.xapian_term());
|
||||
termgen.index_text(utf8_flatten(val), 1, field.xapian_term());
|
||||
/* also add as 'normal' term, so some queries where the indexer
|
||||
* eats special chars also match */
|
||||
if (field.id != Field::Id::BodyText &&
|
||||
field.id != Field::Id::EmbeddedText) {
|
||||
doc.add_term(field.xapian_term(val));
|
||||
}
|
||||
} else
|
||||
throw std::logic_error("not a search term");
|
||||
}
|
||||
|
@ -143,12 +148,19 @@ Document::add(Field::Id id, const Contacts& contacts)
|
|||
if (!cfield_id || *cfield_id != id)
|
||||
continue;
|
||||
|
||||
xdoc_.add_term(field.xapian_term(contact.email));
|
||||
const auto e{contact.email};
|
||||
xdoc_.add_term(field.xapian_term(e));
|
||||
|
||||
/* allow searching for address components, too */
|
||||
const auto atpos = e.find('@');
|
||||
if (atpos != std::string::npos && atpos < e.size() - 1) {
|
||||
xdoc_.add_term(field.xapian_term(e.substr(0, atpos)));
|
||||
xdoc_.add_term(field.xapian_term(e.substr(atpos + 1)));
|
||||
}
|
||||
|
||||
if (!contact.name.empty())
|
||||
termgen.index_text(utf8_flatten(contact.name), 1,
|
||||
field.xapian_term());
|
||||
|
||||
cvec.emplace_back(contact.email + sepa2 + contact.name);
|
||||
}
|
||||
|
||||
|
|
|
@ -81,7 +81,6 @@ make_test_store(const std::string& test_path, const TestMap& test_map,
|
|||
static void
|
||||
test_simple()
|
||||
{
|
||||
|
||||
const TestMap test_msgs = {{
|
||||
|
||||
// "sqlite-msg" "Simple mailing list message.
|
||||
|
@ -157,13 +156,59 @@ I said: "Aujourd'hui!"
|
|||
//g_assert_cmpuint(qr->begin().date().value_or(0), ==, 123454);
|
||||
}
|
||||
|
||||
static void
|
||||
test_spam_address_components()
|
||||
{
|
||||
const TestMap test_msgs = {{
|
||||
|
||||
// "sqlite-msg" "Simple mailing list message.
|
||||
{
|
||||
"spam/cur/spam-msg:2,S",
|
||||
R"(Message-Id: <abcde@foo.bar>
|
||||
From: "Foo Example" <bar@example.com>
|
||||
To: example@example.com
|
||||
Subject: ***SPAM*** this is a test
|
||||
|
||||
Boo!
|
||||
)"},
|
||||
}};
|
||||
TempDir tdir;
|
||||
auto store{make_test_store(tdir.path(), test_msgs, {})};
|
||||
|
||||
g_test_bug("2278");
|
||||
g_test_bug("2281");
|
||||
|
||||
// matches both
|
||||
for (auto&& expr: {
|
||||
"SPAM",
|
||||
"spam",
|
||||
"/.*SPAM.*/",
|
||||
"subject:SPAM",
|
||||
"from:bar@example.com",
|
||||
"subject:\\*\\*\\*SPAM\\*\\*\\*",
|
||||
"bar",
|
||||
"example.com"
|
||||
}) {
|
||||
|
||||
if (g_test_verbose())
|
||||
g_message("query: '%s'", expr);
|
||||
auto qr = store.run_query(expr);
|
||||
assert_valid_result(qr);
|
||||
g_assert_false(qr->empty());
|
||||
g_assert_cmpuint(qr->size(), ==, 1);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char* argv[])
|
||||
{
|
||||
g_test_init(&argc, &argv, NULL);
|
||||
|
||||
g_test_add_func("/store/query/simple", test_simple);
|
||||
g_test_bug_base("https://github.com/djcb/mu/issues/");
|
||||
|
||||
g_test_add_func("/store/query/simple", test_simple);
|
||||
g_test_add_func("/store/query/spam-address-components",
|
||||
test_spam_address_components);
|
||||
|
||||
return g_test_run();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue