mirror of https://github.com/djcb/mu.git
message: refactor/improve attachment heuristic a bit
Also check for X-MS-Has-Attach
This commit is contained in:
parent
39dcd08fbe
commit
9bf580de3d
|
@ -158,3 +158,31 @@ MessagePart::is_encrypted() const noexcept
|
|||
{
|
||||
return mime_object().is_multipart_encrypted();
|
||||
}
|
||||
|
||||
bool /* heuristic */
|
||||
MessagePart::looks_like_attachment() const noexcept
|
||||
{
|
||||
auto matches=[](const MimeContentType& ctype,
|
||||
const std::initializer_list<std::pair<const char*, const char*>>& ctypes) {
|
||||
return std::find_if(ctypes.begin(), ctypes.end(), [&](auto&& item){
|
||||
return ctype.is_type(item.first, item.second); }) != ctypes.end();
|
||||
};
|
||||
|
||||
const auto ctype{mime_object().content_type()};
|
||||
if (!ctype)
|
||||
return false; // no content-type: not an attachment.
|
||||
|
||||
// we consider some parts _not_ to be attachments regardless of disposition
|
||||
if (matches(*ctype,{{"application", "pgp-keys"}}))
|
||||
return false;
|
||||
|
||||
// we consider some parts to be attachments regardless of disposition
|
||||
if (matches(*ctype,{{"image", "*"},
|
||||
{"audio", "*"},
|
||||
{"application", "*"},
|
||||
{"application", "x-patch"}}))
|
||||
return true;
|
||||
|
||||
// otherwise, rely on the disposition
|
||||
return is_attachment();
|
||||
}
|
||||
|
|
|
@ -105,13 +105,23 @@ public:
|
|||
/**
|
||||
* Does this part have an "attachment" disposition? Otherwise it is
|
||||
* "inline". Note that does *not* map 1:1 to a message's HasAttachment
|
||||
* flag.
|
||||
* flag (which uses looks_like_attachment())
|
||||
*
|
||||
* @return true or false.
|
||||
*/
|
||||
bool is_attachment() const noexcept;
|
||||
|
||||
|
||||
/**
|
||||
* Does this part appear to be an attachment from an end-users point of
|
||||
* view? This uses some heuristics to guess. Some parts for which
|
||||
* is_attachment() is true may not "really" be attachments, and
|
||||
* vice-versa
|
||||
*
|
||||
* @return true or false.
|
||||
*/
|
||||
bool looks_like_attachment() const noexcept;
|
||||
|
||||
/**
|
||||
* Is this part signed?
|
||||
*
|
||||
|
|
|
@ -340,45 +340,6 @@ get_mailing_list(const MimeMessage& mime_msg)
|
|||
return to_string_opt_gchar(std::move(res));
|
||||
}
|
||||
|
||||
static bool /* heuristic */
|
||||
looks_like_attachment(const MimeObject& parent,
|
||||
const MimePart& part, const MimeContentType& ctype)
|
||||
{
|
||||
constexpr std::array<std::pair<const char*, const char*>, 4> att_types = {{
|
||||
{"image", "*"},
|
||||
{"audio", "*"},
|
||||
{"application", "*"},
|
||||
{"application", "x-patch"}
|
||||
}};
|
||||
|
||||
if (parent) { /* crypto multipart children are not considered attachments */
|
||||
if (const auto parent_ctype{parent.content_type()}; parent_ctype) {
|
||||
if (parent_ctype->is_type("multipart", "signed") ||
|
||||
parent_ctype->is_type("multipart", "encrypted"))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* we also consider patches, images, audio, and non-pgp-signature
|
||||
* application attachments to be attachments... */
|
||||
if (ctype.is_type("*", "pgp-signature"))
|
||||
return false; /* don't consider as a signature */
|
||||
|
||||
if (ctype.is_type("text", "*") &&
|
||||
(ctype.is_type("*", "plain") || ctype.is_type("*", "html")))
|
||||
return false; /* not a signature */
|
||||
|
||||
/* if not one of those special types, consider it any attachment
|
||||
* if it says so */
|
||||
if (part.is_attachment())
|
||||
return true;
|
||||
|
||||
const auto it = seq_find_if(att_types, [&](auto&& item){
|
||||
return ctype.is_type(item.first, item.second);
|
||||
});
|
||||
return it != att_types.cend(); /* if found, it's an attachment */
|
||||
}
|
||||
|
||||
static void
|
||||
append_text(Option<std::string>& str, Option<std::string> app)
|
||||
{
|
||||
|
@ -403,19 +364,38 @@ accumulate_text(const MimePart& part, Message::Private& info,
|
|||
append_text(info.body_html, part.to_string());
|
||||
}
|
||||
|
||||
|
||||
static bool /* heuristic */
|
||||
looks_like_attachment(const MimeObject& parent, const MessagePart& mpart)
|
||||
{
|
||||
if (parent) { /* crypto multipart children are not considered attachments */
|
||||
if (const auto parent_ctype{parent.content_type()}; parent_ctype) {
|
||||
if (parent_ctype->is_type("multipart", "signed") ||
|
||||
parent_ctype->is_type("multipart", "encrypted"))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return mpart.looks_like_attachment();
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
process_part(const MimeObject& parent, const MimePart& part,
|
||||
Message::Private& info)
|
||||
Message::Private& info, const MessagePart& mpart)
|
||||
{
|
||||
const auto ctype{part.content_type()};
|
||||
if (!ctype)
|
||||
return;
|
||||
|
||||
if (looks_like_attachment(parent, part, *ctype))
|
||||
// flag as calendar, if not already
|
||||
if (none_of(info.flags & Flags::Calendar) &&
|
||||
ctype->is_type("text", "calendar"))
|
||||
info.flags |= Flags::Calendar;
|
||||
|
||||
// flag as attachment, if not already.
|
||||
if (none_of(info.flags & Flags::HasAttachment) &&
|
||||
looks_like_attachment(parent, mpart))
|
||||
info.flags |= Flags::HasAttachment;
|
||||
|
||||
// if there are text parts, gather.
|
||||
|
@ -499,7 +479,7 @@ handle_object(const MimeObject& parent,
|
|||
info.parts.emplace_back(obj);
|
||||
|
||||
if (obj.is_part())
|
||||
process_part(parent, obj, info);
|
||||
process_part(parent, obj, info, info.parts.back());
|
||||
else if (obj.is_message_part())
|
||||
process_message_part(obj, info);
|
||||
else if (obj.is_multipart_signed())
|
||||
|
@ -553,6 +533,16 @@ process_message(const MimeMessage& mime_msg, const std::string& path,
|
|||
info.mailing_list = get_mailing_list(mime_msg);
|
||||
if (info.mailing_list)
|
||||
info.flags |= Flags::MailingList;
|
||||
|
||||
// Microsoft override; outlook message can tell us directly
|
||||
// wther
|
||||
const auto ms_atthdr{mime_msg.header("X-MS-Has-Attach")};
|
||||
if (ms_atthdr) {
|
||||
if (*ms_atthdr == "yes")
|
||||
info.flags |= Flags::HasAttachment;
|
||||
else
|
||||
info.flags &= ~Flags::HasAttachment;
|
||||
}
|
||||
}
|
||||
|
||||
static Mu::Result<std::string>
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "mu-message.hh"
|
||||
#include "mu-mime-object.hh"
|
||||
#include <glib.h>
|
||||
#include <regex>
|
||||
|
||||
using namespace Mu;
|
||||
|
||||
|
@ -569,7 +570,45 @@ Moi,
|
|||
part.mime_type().value_or("boo").c_str());
|
||||
}
|
||||
|
||||
static void
|
||||
test_message_ms_attach()
|
||||
{
|
||||
const std::string msgtext =
|
||||
R"(Date: Thu, 31 Jul 2008 14:57:25 -0400
|
||||
From: "John Milton" <jm@example.com>
|
||||
Subject: Fere libenter homines id quod volunt credunt
|
||||
To: "Julius Caesar" <jc@example.com>
|
||||
Message-id: <3BE9E6535E3029448670913581E7A1A20D852173@emss35m06.us.lmco.com>
|
||||
X-MS-Has-Attach:
|
||||
MIME-version: 1.0
|
||||
Content-type: text/plain; charset=us-ascii
|
||||
Content-transfer-encoding: 7BIT
|
||||
|
||||
OF Mans First Disobedience, and the Fruit
|
||||
Of that Forbidden Tree, whose mortal tast
|
||||
Brought Death into the World, and all our woe,
|
||||
With loss of Eden, till one greater Man
|
||||
)";
|
||||
|
||||
{
|
||||
auto message{Message::make_from_text(msgtext)};
|
||||
g_assert_true(!!message);
|
||||
g_assert_true(message->flags() == (Flags::None));
|
||||
}
|
||||
|
||||
{
|
||||
const auto text2 = std::regex_replace(
|
||||
msgtext, std::regex{"X-MS-Has-Attach:"},
|
||||
"X-MS-Has-Attach: yes");
|
||||
|
||||
g_message("%s", text2.c_str());
|
||||
|
||||
auto message{Message::make_from_text(text2)};
|
||||
|
||||
g_assert_true(!!message);
|
||||
g_assert_true(message->flags() == (Flags::HasAttachment));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
|
@ -841,6 +880,8 @@ main(int argc, char* argv[])
|
|||
test_message_multipart_mixed_rfc822);
|
||||
g_test_add_func("/message/message/detect-attachment",
|
||||
test_message_detect_attachment);
|
||||
g_test_add_func("/message/message/x-ms-has-attach",
|
||||
test_message_ms_attach);
|
||||
g_test_add_func("/message/message/calendar",
|
||||
test_message_calendar);
|
||||
g_test_add_func("/message/message/fail",
|
||||
|
|
Loading…
Reference in New Issue