message: update references() implementation

Clean up the implementation at bit, and filter out 'fake' message-ids,
such as the ones from protonmail.

Update documentation.

Add Mu::Message::thread_id().

This fixes #2312.
This commit is contained in:
Dirk-Jan C. Binnema 2022-08-11 23:01:29 +03:00
parent 11389247c5
commit b1013d8f0f
4 changed files with 86 additions and 19 deletions

View File

@ -301,10 +301,11 @@ public:
size_t size() const { return static_cast<size_t>(document().integer_value(Field::Id::Size)); }
/**
* get the list of references (consisting of both the References and
* In-Reply-To fields), with the oldest first and the direct parent as
* the last one. Note, any reference (message-id) will appear at most
* once, duplicates are filtered out.
* Get the (possibly empty) list of references (consisting of both the
* References and In-Reply-To fields), with the oldest first and the
* direct parent as the last one. Note, any reference (message-id) will
* appear at most once, duplicates and fake-message-id (see impls) are
* filtered out.
*
* @return a vec with the references for this msg.
*/
@ -312,6 +313,17 @@ public:
return document().string_vec_value(Field::Id::References);
}
/**
* Get the thread-id for this message. This is the message-id of the
* oldest-known (grand) parent, or the message-id of this message if
* none.
*
* @return the thread id.
*/
std::string thread_id() const {
return document().string_value(Field::Id::ThreadId);
}
/**
* get the list of tags (ie., X-Label)
*
@ -409,7 +421,6 @@ public:
using Part = MessagePart;
const std::vector<Part>& parts() const;
/**
* Get the path to a cche directory for this message, which
* is useful for temporarily saving attachments

View File

@ -397,22 +397,31 @@ MimeMessage::contacts(Contact::Type ctype) const noexcept
return contacts;
}
/*
* references() returns the concatenation of the References and In-Reply-To
* message-ids (in that order). Duplicates are removed.
*
* The _first_ one in the list determines the thread-id for the message.
*/
std::vector<std::string>
MimeMessage::references() const noexcept
{
constexpr std::array<const char*, 2> ref_headers = {
"References", "In-reply-to",
};
// is ref already in the list?
// is ref already in the list? O(n) but with small n.
auto is_dup = [](auto&& seq, const std::string& ref) {
return seq_some(seq, [&](auto&& str) { return ref == str; });
};
auto is_fake = [](auto&& msgid) {
// this is bit ugly; protonmail injects fake References which
// can otherwise screw up threading.
if (g_str_has_suffix(msgid, "protonmail.internalid"))
return true;
/* ... */
return false;
};
std::vector<std::string> refs;
for (auto&& ref_header: ref_headers) {
for (auto&& ref_header: { "References", "In-reply-to" }) {
auto hdr{header(ref_header)};
if (!hdr)
@ -422,12 +431,9 @@ MimeMessage::references() const noexcept
refs.reserve(refs.size() + g_mime_references_length(mime_refs));
for (auto i = 0; i != g_mime_references_length(mime_refs); ++i) {
const auto msgid{g_mime_references_get_message_id(mime_refs, i)};
if (!msgid || is_dup(refs, msgid))
continue; // invalid or skip dups
refs.emplace_back(msgid);
if (msgid && !is_dup(refs, msgid) && !is_fake(msgid))
refs.emplace_back(msgid);
}
g_mime_references_free(mime_refs);
}

View File

@ -973,7 +973,9 @@ public:
/**
* Get the references for this message (including in-reply-to), in the
* order of older..newer; in-reply-to would be the last one.
* order of older..newer; the first one would the oldest parent, and
* in-reply-to would be the last one (if any). These are de-duplicated,
* and known-fake references removed (see implementation)
*
* @return references.
*/

View File

@ -808,6 +808,52 @@ RU5EOlZFVkVOVA0KRU5EOlZDQUxFTkRBUg0K
}
static void
test_message_references()
{
constexpr auto msgtext =
R"(Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset=utf-8
References: <YuvYh1JbE3v+abd5@kili>
<90a760c4-6e88-07b4-1f20-8b10414e49aa@arm.com>
<T4CDWjUrgtI5n4mh1JEdW6RLYzqbPE9-yDrhEVwDM22WX-198fBwcnLd-4_xR1gvsVSHQps9fp_pZevTF0ZmaA==@protonmail.internalid>
To: "Robin Murphy" <robin.murphy@arm.com>
Reply-To: "Dan Carpenter" <dan.carpenter@oracle.com>
From: "Dan Carpenter" <dan.carpenter@oracle.com>
Subject: Re: [PATCH] iommu/omap: fix buffer overflow in debugfs
List-Id: <kernel-janitors.vger.kernel.org>
Date: Fri, 5 Aug 2022 09:37:02 +0300
In-Reply-To: <90a760c4-6e88-07b4-1f20-8b10414e49aa@arm.com>
Precedence: bulk
Message-Id: <20220805063702.GH3438@kadam>
On Thu, Aug 04, 2022 at 05:31:39PM +0100, Robin Murphy wrote:
> On 04/08/2022 3:32 pm, Dan Carpenter wrote:
> > There are two issues here:
)";
auto message{Message::make_from_text(
msgtext,
"/home/test/Maildir/inbox/cur/162342449279256.88888_1.evergrey:2,S")};
g_assert_true(!!message);
assert_equal(message->subject(),
"Re: [PATCH] iommu/omap: fix buffer overflow in debugfs");
g_assert_true(message->priority() == Priority::Low);
/*
* "90a760c4-6e88-07b4-1f20-8b10414e49aa@arm.com" is seen both in
* references and in-reply-to; in the de-duplication, the first one wins.
*/
std::vector<std::string> expected_refs = {
"YuvYh1JbE3v+abd5@kili",
"90a760c4-6e88-07b4-1f20-8b10414e49aa@arm.com",
/* protonmail.internalid is fake and removed */
// "T4CDWjUrgtI5n4mh1JEdW6RLYzqbPE9-yDrhEVwDM22WX-198fBwcnLd-4_"
// "xR1gvsVSHQps9fp_pZevTF0ZmaA==@protonmail.internalid"
};
assert_equal_seq_str(expected_refs, message->references());
}
static void
test_message_fail ()
@ -850,6 +896,8 @@ main(int argc, char* argv[])
test_message_detect_attachment);
g_test_add_func("/message/message/calendar",
test_message_calendar);
g_test_add_func("/message/message/references",
test_message_references);
g_test_add_func("/message/message/fail",
test_message_fail);
g_test_add_func("/message/message/sanitize-maildir",