mirror of https://github.com/djcb/mu.git
document: index some sub-parts as well
1. Also add 'normal' terms for some indexable fields 2. Add terms for e-mail address components And add some tests. This helps for some corner-case queries (see tests). Fixes #2278 Fixes #2281
This commit is contained in:
parent
6cb38c8125
commit
df80935c2e
|
@ -30,7 +30,6 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <utils/mu-utils.hh>
|
#include <utils/mu-utils.hh>
|
||||||
|
|
||||||
|
|
||||||
using namespace Mu;
|
using namespace Mu;
|
||||||
|
|
||||||
constexpr uint8_t SepaChar1 = 0xfe;
|
constexpr uint8_t SepaChar1 = 0xfe;
|
||||||
|
@ -46,7 +45,13 @@ add_search_term(Xapian::Document& doc, const Field& field, const std::string& va
|
||||||
} else if (field.is_indexable_term()) {
|
} else if (field.is_indexable_term()) {
|
||||||
Xapian::TermGenerator termgen;
|
Xapian::TermGenerator termgen;
|
||||||
termgen.set_document(doc);
|
termgen.set_document(doc);
|
||||||
termgen.index_text(utf8_flatten(val),1,field.xapian_term());
|
termgen.index_text(utf8_flatten(val), 1, field.xapian_term());
|
||||||
|
/* also add as 'normal' term, so some queries where the indexer
|
||||||
|
* eats special chars also match */
|
||||||
|
if (field.id != Field::Id::BodyText &&
|
||||||
|
field.id != Field::Id::EmbeddedText) {
|
||||||
|
doc.add_term(field.xapian_term(val));
|
||||||
|
}
|
||||||
} else
|
} else
|
||||||
throw std::logic_error("not a search term");
|
throw std::logic_error("not a search term");
|
||||||
}
|
}
|
||||||
|
@ -143,12 +148,19 @@ Document::add(Field::Id id, const Contacts& contacts)
|
||||||
if (!cfield_id || *cfield_id != id)
|
if (!cfield_id || *cfield_id != id)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
xdoc_.add_term(field.xapian_term(contact.email));
|
const auto e{contact.email};
|
||||||
|
xdoc_.add_term(field.xapian_term(e));
|
||||||
|
|
||||||
|
/* allow searching for address components, too */
|
||||||
|
const auto atpos = e.find('@');
|
||||||
|
if (atpos != std::string::npos && atpos < e.size() - 1) {
|
||||||
|
xdoc_.add_term(field.xapian_term(e.substr(0, atpos)));
|
||||||
|
xdoc_.add_term(field.xapian_term(e.substr(atpos + 1)));
|
||||||
|
}
|
||||||
|
|
||||||
if (!contact.name.empty())
|
if (!contact.name.empty())
|
||||||
termgen.index_text(utf8_flatten(contact.name), 1,
|
termgen.index_text(utf8_flatten(contact.name), 1,
|
||||||
field.xapian_term());
|
field.xapian_term());
|
||||||
|
|
||||||
cvec.emplace_back(contact.email + sepa2 + contact.name);
|
cvec.emplace_back(contact.email + sepa2 + contact.name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -81,7 +81,6 @@ make_test_store(const std::string& test_path, const TestMap& test_map,
|
||||||
static void
|
static void
|
||||||
test_simple()
|
test_simple()
|
||||||
{
|
{
|
||||||
|
|
||||||
const TestMap test_msgs = {{
|
const TestMap test_msgs = {{
|
||||||
|
|
||||||
// "sqlite-msg" "Simple mailing list message.
|
// "sqlite-msg" "Simple mailing list message.
|
||||||
|
@ -157,13 +156,59 @@ I said: "Aujourd'hui!"
|
||||||
//g_assert_cmpuint(qr->begin().date().value_or(0), ==, 123454);
|
//g_assert_cmpuint(qr->begin().date().value_or(0), ==, 123454);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
test_spam_address_components()
|
||||||
|
{
|
||||||
|
const TestMap test_msgs = {{
|
||||||
|
|
||||||
|
// "sqlite-msg" "Simple mailing list message.
|
||||||
|
{
|
||||||
|
"spam/cur/spam-msg:2,S",
|
||||||
|
R"(Message-Id: <abcde@foo.bar>
|
||||||
|
From: "Foo Example" <bar@example.com>
|
||||||
|
To: example@example.com
|
||||||
|
Subject: ***SPAM*** this is a test
|
||||||
|
|
||||||
|
Boo!
|
||||||
|
)"},
|
||||||
|
}};
|
||||||
|
TempDir tdir;
|
||||||
|
auto store{make_test_store(tdir.path(), test_msgs, {})};
|
||||||
|
|
||||||
|
g_test_bug("2278");
|
||||||
|
g_test_bug("2281");
|
||||||
|
|
||||||
|
// matches both
|
||||||
|
for (auto&& expr: {
|
||||||
|
"SPAM",
|
||||||
|
"spam",
|
||||||
|
"/.*SPAM.*/",
|
||||||
|
"subject:SPAM",
|
||||||
|
"from:bar@example.com",
|
||||||
|
"subject:\\*\\*\\*SPAM\\*\\*\\*",
|
||||||
|
"bar",
|
||||||
|
"example.com"
|
||||||
|
}) {
|
||||||
|
|
||||||
|
if (g_test_verbose())
|
||||||
|
g_message("query: '%s'", expr);
|
||||||
|
auto qr = store.run_query(expr);
|
||||||
|
assert_valid_result(qr);
|
||||||
|
g_assert_false(qr->empty());
|
||||||
|
g_assert_cmpuint(qr->size(), ==, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
main(int argc, char* argv[])
|
main(int argc, char* argv[])
|
||||||
{
|
{
|
||||||
g_test_init(&argc, &argv, NULL);
|
g_test_init(&argc, &argv, NULL);
|
||||||
|
|
||||||
g_test_add_func("/store/query/simple", test_simple);
|
g_test_bug_base("https://github.com/djcb/mu/issues/");
|
||||||
|
|
||||||
|
g_test_add_func("/store/query/simple", test_simple);
|
||||||
|
g_test_add_func("/store/query/spam-address-components",
|
||||||
|
test_spam_address_components);
|
||||||
|
|
||||||
return g_test_run();
|
return g_test_run();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue