From a5291e65516fd7476fc8a7a5afe4f85ab795bff1 Mon Sep 17 00:00:00 2001 From: "Dirk-Jan C. Binnema" Date: Mon, 19 Sep 2022 18:27:03 +0300 Subject: [PATCH] message: convert mime-parts to utf-8 Ensure that non-utf8 mime-parts are converted to utf8. This fixes a problem with messages with such parts; added unit test. Fixes #2333. --- lib/message/mu-mime-object.cc | 17 +++++++- lib/tests/test-mu-store-query.cc | 69 ++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 2 deletions(-) diff --git a/lib/message/mu-mime-object.cc b/lib/message/mu-mime-object.cc index 7e5734fd..17b56f0a 100644 --- a/lib/message/mu-mime-object.cc +++ b/lib/message/mu-mime-object.cc @@ -477,10 +477,22 @@ MimePart::size() const noexcept return static_cast(g_mime_stream_length(stream)); } - Option MimePart::to_string() const noexcept { + /* + * easy case: text. this automatically handles conversion to utf-8. + */ + if (GMIME_IS_TEXT_PART(self())) { + if (char* txt{g_mime_text_part_get_text(GMIME_TEXT_PART(self()))}; !txt) + return Nothing; + else + return to_string_gchar(std::move(txt)/*consumes*/); + } + + /* + * harder case: read from stream manually + */ GMimeDataWrapper *wrapper{g_mime_part_get_content(self())}; if (!wrapper) { /* this happens with invalid mails */ g_debug("failed to create data wrapper"); @@ -493,7 +505,6 @@ MimePart::to_string() const noexcept return Nothing; } - ssize_t buflen{g_mime_data_wrapper_write_to_stream(wrapper, stream)}; if (buflen <= 0) { /* empty buffer, not an error */ g_object_unref(stream); @@ -513,9 +524,11 @@ MimePart::to_string() const noexcept buffer.resize(buflen); return buffer; + } + Result MimePart::to_file(const std::string& path, bool overwrite) const noexcept { diff --git a/lib/tests/test-mu-store-query.cc b/lib/tests/test-mu-store-query.cc index 84bbd82d..66496e0b 100644 --- a/lib/tests/test-mu-store-query.cc +++ b/lib/tests/test-mu-store-query.cc @@ -449,6 +449,73 @@ On Thu, Aug 04, 2022 at 05:31:39PM +0100, Robin Murphy wrote: } +static void +test_body_matricula() +{ + const TestMap test_msgs = {{ +{ +"basic/cur/matricula-msg:2,S", +R"(From: XXX +Subject: + =?iso-8859-1?Q?EF_-_Pago_matr=EDcula_de_la_matr=EDcula_de_inscripci=F3n_a?= +Date: Thu, 4 Aug 2022 14:29:41 +0000 +Message-ID: + +Accept-Language: es-AR, es-ES, en-US +Content-Language: es-AR +X-MS-Has-Attach: yes +Content-Type: multipart/mixed; + boundary="_004_VE1PR03MB5471882920DE08CFE44D97A0FE9F9VE1PR03MB5471eurp_" +MIME-Version: 1.0 +X-OriginatorOrg: ef.com +X-MS-Exchange-CrossTenant-AuthAs: Internal +X-MS-Exchange-CrossTenant-AuthSource: VE1PR03MB5471.eurprd03.prod.outlook.com + +--_004_VE1PR03MB5471882920DE08CFE44D97A0FE9F9VE1PR03MB5471eurp_ +Content-Type: multipart/alternative; + boundary="_000_VE1PR03MB5471882920DE08CFE44D97A0FE9F9VE1PR03MB5471eurp_" + +--_000_VE1PR03MB5471882920DE08CFE44D97A0FE9F9VE1PR03MB5471eurp_ +Content-Type: text/plain; charset="iso-8859-1" +Content-Transfer-Encoding: quoted-printable + +Buenas tardes Familia, + + +Espero que est=E9n muy bien. + + + +Ya cargamos en sistema su pre inscripci=F3n para el curso + + +Quedamos atentos ante cualquier consulta que surja. + +Saludos, +)"}, +}}; + TempDir tdir; + auto store{make_test_store(tdir.path(), test_msgs, {})}; + + /* i.e., non-utf8 text parts were not converted */ + g_test_bug("2333"); + + // matches + for (auto&& expr: { + "subject:matrícula", + "subject:matricula", + "body:atentos", + "body:inscripción" + }) { + + if (g_test_verbose()) + g_message("query: '%s'", expr); + auto qr = store.run_query(expr); + assert_valid_result(qr); + g_assert_false(qr->empty()); + g_assert_cmpuint(qr->size(), ==, 1); + } +} int main(int argc, char* argv[]) @@ -464,6 +531,8 @@ main(int argc, char* argv[]) test_dups_related); g_test_add_func("/store/query/related-missing-root", test_related_missing_root); + g_test_add_func("/store/query/body-matricula", + test_body_matricula); return g_test_run(); }