mirror of
https://github.com/djcb/mu.git
synced 2024-06-26 07:29:17 +02:00
mu: index html text as if it were plain text
This is a bit of hack to include html text in results. Of course, html text is not really plain text, so this is a bit of a hack until we introduce some html parsing step.
This commit is contained in:
parent
ea08378ce6
commit
abfa6f277c
|
@ -83,6 +83,16 @@ add_search_term(Xapian::Document& doc, const Field& field, const std::string& va
|
||||||
throw std::logic_error("not a search term");
|
throw std::logic_error("not a search term");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* hack... import html text as if it were plain text. */
|
||||||
|
static void
|
||||||
|
add_body_html(Xapian::Document& doc, const Field& field, const std::string& val)
|
||||||
|
{
|
||||||
|
static Field body_field = field_from_id(Field::Id::BodyText);
|
||||||
|
|
||||||
|
Xapian::TermGenerator termgen;
|
||||||
|
termgen.set_document(doc);
|
||||||
|
termgen.index_text(utf8_flatten(val), 1, body_field.xapian_term());
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
Document::add(Field::Id id, const std::string& val)
|
Document::add(Field::Id id, const std::string& val)
|
||||||
|
@ -94,7 +104,8 @@ Document::add(Field::Id id, const std::string& val)
|
||||||
|
|
||||||
if (field.is_searchable())
|
if (field.is_searchable())
|
||||||
add_search_term(xdoc_, field, val);
|
add_search_term(xdoc_, field, val);
|
||||||
|
else if (id == Field::Id::XBodyHtml)
|
||||||
|
add_body_html(xdoc_, field, val);
|
||||||
if (field.include_in_sexp()) {
|
if (field.include_in_sexp()) {
|
||||||
put_prop(field, val);
|
put_prop(field, val);
|
||||||
}
|
}
|
||||||
|
|
|
@ -718,9 +718,60 @@ Boo!
|
||||||
assert_valid_result(qr);
|
assert_valid_result(qr);
|
||||||
g_assert_cmpuint(qr->size(), ==, 3);
|
g_assert_cmpuint(qr->size(), ==, 3);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
test_html()
|
||||||
|
{
|
||||||
|
// test message sent to self, and copy of received msg.
|
||||||
|
|
||||||
|
const auto test_msg = R"(From: Test <test@example.com>
|
||||||
|
To: abc@example.com
|
||||||
|
Date: Mon, 23 May 2011 10:53:45 +0200
|
||||||
|
Subject: vla
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: multipart/alternative;
|
||||||
|
boundary="_=aspNetEmail=_5ed4592191214c7a99bd7f6a3a0f077d"
|
||||||
|
Message-ID: <10374608.109906.11909.20115aabbccdd.MSGID@mailinglijst.nl>
|
||||||
|
|
||||||
|
--_=aspNetEmail=_5ed4592191214c7a99bd7f6a3a0f077d
|
||||||
|
Content-Type: text/plain; charset="iso-8859-15"
|
||||||
|
Content-Transfer-Encoding: quoted-printable
|
||||||
|
|
||||||
|
text
|
||||||
|
|
||||||
|
--_=aspNetEmail=_5ed4592191214c7a99bd7f6a3a0f077d
|
||||||
|
Content-Type: text/html; charset="iso-8859-15"
|
||||||
|
Content-Transfer-Encoding: quoted-printable
|
||||||
|
|
||||||
|
html
|
||||||
|
|
||||||
|
--_=aspNetEmail=_5ed4592191214c7a99bd7f6a3a0f077d--
|
||||||
|
)";
|
||||||
|
const TestMap test_msgs = {{"inbox/cur/msg1", test_msg }};
|
||||||
|
|
||||||
|
TempDir tdir;
|
||||||
|
auto store{make_test_store(tdir.path(), test_msgs, {})};
|
||||||
|
g_assert_cmpuint(store.size(), ==, 1);
|
||||||
|
|
||||||
|
{
|
||||||
|
auto qr = store.run_query("body:text", Field::Id::Date,
|
||||||
|
QueryFlags::None);
|
||||||
|
assert_valid_result(qr);
|
||||||
|
g_assert_cmpuint(qr->size(), ==, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
auto qr = store.run_query("body:html", Field::Id::Date,
|
||||||
|
QueryFlags::None);
|
||||||
|
assert_valid_result(qr);
|
||||||
|
g_assert_cmpuint(qr->size(), ==, 1);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
main(int argc, char* argv[])
|
main(int argc, char* argv[])
|
||||||
{
|
{
|
||||||
|
@ -745,6 +796,7 @@ main(int argc, char* argv[])
|
||||||
test_term_split);
|
test_term_split);
|
||||||
g_test_add_func("/store/query/related-dup-threaded",
|
g_test_add_func("/store/query/related-dup-threaded",
|
||||||
test_related_dup_threaded);
|
test_related_dup_threaded);
|
||||||
|
g_test_add_func("/store/query/html", test_html);
|
||||||
|
|
||||||
return g_test_run();
|
return g_test_run();
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
** Copyright (C) 2008-2022 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
** Copyright (C) 2008-2023 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||||
**
|
**
|
||||||
** This program is free software; you can redistribute it and/or modify it
|
** This program is free software; you can redistribute it and/or modify it
|
||||||
** under the terms of the GNU General Public License as published by the
|
** under the terms of the GNU General Public License as published by the
|
||||||
|
@ -150,15 +150,29 @@ test_mu_find_01(void)
|
||||||
static void
|
static void
|
||||||
test_mu_find_02(void)
|
test_mu_find_02(void)
|
||||||
{
|
{
|
||||||
search("bull", 1);
|
/* when matching html as if it were text,
|
||||||
|
* 'bull' is also matched in arto.eml, •
|
||||||
|
*/
|
||||||
|
// search("bull", 1);
|
||||||
|
// search("bull m:foo", 0);
|
||||||
|
// search("bull m:/foo", 1);
|
||||||
|
// search("bull m:/Foo", 1);
|
||||||
|
// search("bull flag:attach", 1);
|
||||||
|
// search("bull flag:a", 1);
|
||||||
|
|
||||||
|
search("bull", 2);
|
||||||
search("bull m:foo", 0);
|
search("bull m:foo", 0);
|
||||||
search("bull m:/foo", 1);
|
search("bull m:/foo", 2);
|
||||||
search("bull m:/Foo", 1);
|
search("bull m:/Foo", 2);
|
||||||
search("bull flag:attach", 1);
|
search("bull flag:attach", 1);
|
||||||
search("bull flag:a", 1);
|
search("bull flag:a", 1);
|
||||||
|
|
||||||
|
|
||||||
search("g:x", 0);
|
search("g:x", 0);
|
||||||
search("flag:encrypted", 0);
|
search("flag:encrypted", 0);
|
||||||
search("flag:attach", 1);
|
search("flag:attach", 1);
|
||||||
|
|
||||||
|
search("i:3BE9E6535E0D852173@emss35m06.us.lmco.com", 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -191,17 +205,9 @@ test_mu_find_text_in_rfc822(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* some more tests */
|
/* some more tests */
|
||||||
static void
|
|
||||||
test_mu_find_03(void)
|
|
||||||
{
|
|
||||||
search("bull", 1);
|
|
||||||
search("bull m:foo", 0);
|
|
||||||
search("bull m:/foo", 1);
|
|
||||||
search("i:3BE9E6535E0D852173@emss35m06.us.lmco.com", 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void /* error cases */
|
static void /* error cases */
|
||||||
test_mu_find_04(void)
|
test_mu_find_03()
|
||||||
{
|
{
|
||||||
gchar *cmdline, *erroutput;
|
gchar *cmdline, *erroutput;
|
||||||
|
|
||||||
|
@ -840,7 +846,6 @@ main(int argc, char* argv[])
|
||||||
g_test_add_func("/mu-cmd/test-mu-find-text-in-rfc822", test_mu_find_text_in_rfc822);
|
g_test_add_func("/mu-cmd/test-mu-find-text-in-rfc822", test_mu_find_text_in_rfc822);
|
||||||
|
|
||||||
g_test_add_func("/mu-cmd/test-mu-find-03", test_mu_find_03);
|
g_test_add_func("/mu-cmd/test-mu-find-03", test_mu_find_03);
|
||||||
g_test_add_func("/mu-cmd/test-mu-find-04", test_mu_find_04);
|
|
||||||
g_test_add_func("/mu-cmd/test-mu-find-maildir-special", test_mu_find_maildir_special);
|
g_test_add_func("/mu-cmd/test-mu-find-maildir-special", test_mu_find_maildir_special);
|
||||||
g_test_add_func("/mu-cmd/test-mu-extract-01", test_mu_extract_01);
|
g_test_add_func("/mu-cmd/test-mu-extract-01", test_mu_extract_01);
|
||||||
g_test_add_func("/mu-cmd/test-mu-extract-02", test_mu_extract_02);
|
g_test_add_func("/mu-cmd/test-mu-extract-02", test_mu_extract_02);
|
||||||
|
|
Loading…
Reference in New Issue
Block a user