message: refactor/improve attachment heuristic a bit

Also check for X-MS-Has-Attach
This commit is contained in:
Dirk-Jan C. Binnema 2022-06-10 00:43:47 +03:00
parent 39dcd08fbe
commit 9bf580de3d
4 changed files with 112 additions and 43 deletions

View File

@ -158,3 +158,31 @@ MessagePart::is_encrypted() const noexcept
{
return mime_object().is_multipart_encrypted();
}
bool /* heuristic */
MessagePart::looks_like_attachment() const noexcept
{
auto matches=[](const MimeContentType& ctype,
const std::initializer_list<std::pair<const char*, const char*>>& ctypes) {
return std::find_if(ctypes.begin(), ctypes.end(), [&](auto&& item){
return ctype.is_type(item.first, item.second); }) != ctypes.end();
};
const auto ctype{mime_object().content_type()};
if (!ctype)
return false; // no content-type: not an attachment.
// we consider some parts _not_ to be attachments regardless of disposition
if (matches(*ctype,{{"application", "pgp-keys"}}))
return false;
// we consider some parts to be attachments regardless of disposition
if (matches(*ctype,{{"image", "*"},
{"audio", "*"},
{"application", "*"},
{"application", "x-patch"}}))
return true;
// otherwise, rely on the disposition
return is_attachment();
}

View File

@ -105,13 +105,23 @@ public:
/**
* Does this part have an "attachment" disposition? Otherwise it is
* "inline". Note that does *not* map 1:1 to a message's HasAttachment
* flag.
* flag (which uses looks_like_attachment())
*
* @return true or false.
*/
bool is_attachment() const noexcept;
/**
* Does this part appear to be an attachment from an end-users point of
* view? This uses some heuristics to guess. Some parts for which
* is_attachment() is true may not "really" be attachments, and
* vice-versa
*
* @return true or false.
*/
bool looks_like_attachment() const noexcept;
/**
* Is this part signed?
*

View File

@ -340,45 +340,6 @@ get_mailing_list(const MimeMessage& mime_msg)
return to_string_opt_gchar(std::move(res));
}
static bool /* heuristic */
looks_like_attachment(const MimeObject& parent,
const MimePart& part, const MimeContentType& ctype)
{
constexpr std::array<std::pair<const char*, const char*>, 4> att_types = {{
{"image", "*"},
{"audio", "*"},
{"application", "*"},
{"application", "x-patch"}
}};
if (parent) { /* crypto multipart children are not considered attachments */
if (const auto parent_ctype{parent.content_type()}; parent_ctype) {
if (parent_ctype->is_type("multipart", "signed") ||
parent_ctype->is_type("multipart", "encrypted"))
return false;
}
}
/* we also consider patches, images, audio, and non-pgp-signature
* application attachments to be attachments... */
if (ctype.is_type("*", "pgp-signature"))
return false; /* don't consider as a signature */
if (ctype.is_type("text", "*") &&
(ctype.is_type("*", "plain") || ctype.is_type("*", "html")))
return false; /* not a signature */
/* if not one of those special types, consider it any attachment
* if it says so */
if (part.is_attachment())
return true;
const auto it = seq_find_if(att_types, [&](auto&& item){
return ctype.is_type(item.first, item.second);
});
return it != att_types.cend(); /* if found, it's an attachment */
}
static void
append_text(Option<std::string>& str, Option<std::string> app)
{
@ -403,19 +364,38 @@ accumulate_text(const MimePart& part, Message::Private& info,
append_text(info.body_html, part.to_string());
}
static bool /* heuristic */
looks_like_attachment(const MimeObject& parent, const MessagePart& mpart)
{
if (parent) { /* crypto multipart children are not considered attachments */
if (const auto parent_ctype{parent.content_type()}; parent_ctype) {
if (parent_ctype->is_type("multipart", "signed") ||
parent_ctype->is_type("multipart", "encrypted"))
return false;
}
}
return mpart.looks_like_attachment();
}
static void
process_part(const MimeObject& parent, const MimePart& part,
Message::Private& info)
Message::Private& info, const MessagePart& mpart)
{
const auto ctype{part.content_type()};
if (!ctype)
return;
if (looks_like_attachment(parent, part, *ctype))
// flag as calendar, if not already
if (none_of(info.flags & Flags::Calendar) &&
ctype->is_type("text", "calendar"))
info.flags |= Flags::Calendar;
// flag as attachment, if not already.
if (none_of(info.flags & Flags::HasAttachment) &&
looks_like_attachment(parent, mpart))
info.flags |= Flags::HasAttachment;
// if there are text parts, gather.
@ -499,7 +479,7 @@ handle_object(const MimeObject& parent,
info.parts.emplace_back(obj);
if (obj.is_part())
process_part(parent, obj, info);
process_part(parent, obj, info, info.parts.back());
else if (obj.is_message_part())
process_message_part(obj, info);
else if (obj.is_multipart_signed())
@ -553,6 +533,16 @@ process_message(const MimeMessage& mime_msg, const std::string& path,
info.mailing_list = get_mailing_list(mime_msg);
if (info.mailing_list)
info.flags |= Flags::MailingList;
// Microsoft override; outlook message can tell us directly
// wther
const auto ms_atthdr{mime_msg.header("X-MS-Has-Attach")};
if (ms_atthdr) {
if (*ms_atthdr == "yes")
info.flags |= Flags::HasAttachment;
else
info.flags &= ~Flags::HasAttachment;
}
}
static Mu::Result<std::string>

View File

@ -20,6 +20,7 @@
#include "mu-message.hh"
#include "mu-mime-object.hh"
#include <glib.h>
#include <regex>
using namespace Mu;
@ -569,7 +570,45 @@ Moi,
part.mime_type().value_or("boo").c_str());
}
static void
test_message_ms_attach()
{
const std::string msgtext =
R"(Date: Thu, 31 Jul 2008 14:57:25 -0400
From: "John Milton" <jm@example.com>
Subject: Fere libenter homines id quod volunt credunt
To: "Julius Caesar" <jc@example.com>
Message-id: <3BE9E6535E3029448670913581E7A1A20D852173@emss35m06.us.lmco.com>
X-MS-Has-Attach:
MIME-version: 1.0
Content-type: text/plain; charset=us-ascii
Content-transfer-encoding: 7BIT
OF Mans First Disobedience, and the Fruit
Of that Forbidden Tree, whose mortal tast
Brought Death into the World, and all our woe,
With loss of Eden, till one greater Man
)";
{
auto message{Message::make_from_text(msgtext)};
g_assert_true(!!message);
g_assert_true(message->flags() == (Flags::None));
}
{
const auto text2 = std::regex_replace(
msgtext, std::regex{"X-MS-Has-Attach:"},
"X-MS-Has-Attach: yes");
g_message("%s", text2.c_str());
auto message{Message::make_from_text(text2)};
g_assert_true(!!message);
g_assert_true(message->flags() == (Flags::HasAttachment));
}
}
static void
@ -841,6 +880,8 @@ main(int argc, char* argv[])
test_message_multipart_mixed_rfc822);
g_test_add_func("/message/message/detect-attachment",
test_message_detect_attachment);
g_test_add_func("/message/message/x-ms-has-attach",
test_message_ms_attach);
g_test_add_func("/message/message/calendar",
test_message_calendar);
g_test_add_func("/message/message/fail",