From b1013d8f0f50c618f21fea7a85ad7af62c3f095b Mon Sep 17 00:00:00 2001 From: "Dirk-Jan C. Binnema" Date: Thu, 11 Aug 2022 23:01:29 +0300 Subject: [PATCH] message: update references() implementation Clean up the implementation at bit, and filter out 'fake' message-ids, such as the ones from protonmail. Update documentation. Add Mu::Message::thread_id(). This fixes #2312. --- lib/message/mu-message.hh | 21 +++++++++++---- lib/message/mu-mime-object.cc | 32 ++++++++++++++--------- lib/message/mu-mime-object.hh | 4 ++- lib/message/test-mu-message.cc | 48 ++++++++++++++++++++++++++++++++++ 4 files changed, 86 insertions(+), 19 deletions(-) diff --git a/lib/message/mu-message.hh b/lib/message/mu-message.hh index 043ef424..3f2e001b 100644 --- a/lib/message/mu-message.hh +++ b/lib/message/mu-message.hh @@ -301,10 +301,11 @@ public: size_t size() const { return static_cast(document().integer_value(Field::Id::Size)); } /** - * get the list of references (consisting of both the References and - * In-Reply-To fields), with the oldest first and the direct parent as - * the last one. Note, any reference (message-id) will appear at most - * once, duplicates are filtered out. + * Get the (possibly empty) list of references (consisting of both the + * References and In-Reply-To fields), with the oldest first and the + * direct parent as the last one. Note, any reference (message-id) will + * appear at most once, duplicates and fake-message-id (see impls) are + * filtered out. * * @return a vec with the references for this msg. */ @@ -312,6 +313,17 @@ public: return document().string_vec_value(Field::Id::References); } + /** + * Get the thread-id for this message. This is the message-id of the + * oldest-known (grand) parent, or the message-id of this message if + * none. + * + * @return the thread id. + */ + std::string thread_id() const { + return document().string_value(Field::Id::ThreadId); + } + /** * get the list of tags (ie., X-Label) * @@ -409,7 +421,6 @@ public: using Part = MessagePart; const std::vector& parts() const; - /** * Get the path to a cche directory for this message, which * is useful for temporarily saving attachments diff --git a/lib/message/mu-mime-object.cc b/lib/message/mu-mime-object.cc index 1b0783b4..7e5734fd 100644 --- a/lib/message/mu-mime-object.cc +++ b/lib/message/mu-mime-object.cc @@ -397,22 +397,31 @@ MimeMessage::contacts(Contact::Type ctype) const noexcept return contacts; } - - +/* + * references() returns the concatenation of the References and In-Reply-To + * message-ids (in that order). Duplicates are removed. + * + * The _first_ one in the list determines the thread-id for the message. + */ std::vector MimeMessage::references() const noexcept { - constexpr std::array ref_headers = { - "References", "In-reply-to", - }; - - // is ref already in the list? + // is ref already in the list? O(n) but with small n. auto is_dup = [](auto&& seq, const std::string& ref) { return seq_some(seq, [&](auto&& str) { return ref == str; }); }; + auto is_fake = [](auto&& msgid) { + // this is bit ugly; protonmail injects fake References which + // can otherwise screw up threading. + if (g_str_has_suffix(msgid, "protonmail.internalid")) + return true; + /* ... */ + return false; + }; + std::vector refs; - for (auto&& ref_header: ref_headers) { + for (auto&& ref_header: { "References", "In-reply-to" }) { auto hdr{header(ref_header)}; if (!hdr) @@ -422,12 +431,9 @@ MimeMessage::references() const noexcept refs.reserve(refs.size() + g_mime_references_length(mime_refs)); for (auto i = 0; i != g_mime_references_length(mime_refs); ++i) { - const auto msgid{g_mime_references_get_message_id(mime_refs, i)}; - if (!msgid || is_dup(refs, msgid)) - continue; // invalid or skip dups - - refs.emplace_back(msgid); + if (msgid && !is_dup(refs, msgid) && !is_fake(msgid)) + refs.emplace_back(msgid); } g_mime_references_free(mime_refs); } diff --git a/lib/message/mu-mime-object.hh b/lib/message/mu-mime-object.hh index 1eee7567..56a4b56c 100644 --- a/lib/message/mu-mime-object.hh +++ b/lib/message/mu-mime-object.hh @@ -973,7 +973,9 @@ public: /** * Get the references for this message (including in-reply-to), in the - * order of older..newer; in-reply-to would be the last one. + * order of older..newer; the first one would the oldest parent, and + * in-reply-to would be the last one (if any). These are de-duplicated, + * and known-fake references removed (see implementation) * * @return references. */ diff --git a/lib/message/test-mu-message.cc b/lib/message/test-mu-message.cc index 8d85fae2..6d19488e 100644 --- a/lib/message/test-mu-message.cc +++ b/lib/message/test-mu-message.cc @@ -808,6 +808,52 @@ RU5EOlZFVkVOVA0KRU5EOlZDQUxFTkRBUg0K } +static void +test_message_references() +{ + constexpr auto msgtext = +R"(Content-Transfer-Encoding: quoted-printable +Content-Type: text/plain; charset=utf-8 +References: + <90a760c4-6e88-07b4-1f20-8b10414e49aa@arm.com> + +To: "Robin Murphy" +Reply-To: "Dan Carpenter" +From: "Dan Carpenter" +Subject: Re: [PATCH] iommu/omap: fix buffer overflow in debugfs +List-Id: +Date: Fri, 5 Aug 2022 09:37:02 +0300 +In-Reply-To: <90a760c4-6e88-07b4-1f20-8b10414e49aa@arm.com> +Precedence: bulk +Message-Id: <20220805063702.GH3438@kadam> + +On Thu, Aug 04, 2022 at 05:31:39PM +0100, Robin Murphy wrote: +> On 04/08/2022 3:32 pm, Dan Carpenter wrote: +> > There are two issues here: +)"; + auto message{Message::make_from_text( + msgtext, + "/home/test/Maildir/inbox/cur/162342449279256.88888_1.evergrey:2,S")}; + g_assert_true(!!message); + assert_equal(message->subject(), + "Re: [PATCH] iommu/omap: fix buffer overflow in debugfs"); + g_assert_true(message->priority() == Priority::Low); + + /* + * "90a760c4-6e88-07b4-1f20-8b10414e49aa@arm.com" is seen both in + * references and in-reply-to; in the de-duplication, the first one wins. + */ + std::vector expected_refs = { + "YuvYh1JbE3v+abd5@kili", + "90a760c4-6e88-07b4-1f20-8b10414e49aa@arm.com", + /* protonmail.internalid is fake and removed */ + // "T4CDWjUrgtI5n4mh1JEdW6RLYzqbPE9-yDrhEVwDM22WX-198fBwcnLd-4_" + // "xR1gvsVSHQps9fp_pZevTF0ZmaA==@protonmail.internalid" + }; + + assert_equal_seq_str(expected_refs, message->references()); +} + static void test_message_fail () @@ -850,6 +896,8 @@ main(int argc, char* argv[]) test_message_detect_attachment); g_test_add_func("/message/message/calendar", test_message_calendar); + g_test_add_func("/message/message/references", + test_message_references); g_test_add_func("/message/message/fail", test_message_fail); g_test_add_func("/message/message/sanitize-maildir",