mirror of https://github.com/djcb/mu.git
message: try to detect body text language
Try to detect the language of the e-mail body and make it searchable.
This commit is contained in:
parent
ad64093183
commit
7f2eeb1010
|
@ -165,7 +165,6 @@ test_field_from_name()
|
|||
Field::Id::Bcc);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
test_xapian_term()
|
||||
{
|
||||
|
|
|
@ -52,6 +52,7 @@ struct Field {
|
|||
File, /**< Filename */
|
||||
Flags, /**< Message flags */
|
||||
From, /**< Message sender */
|
||||
Language, /**< Body language */
|
||||
Maildir, /**< Maildir path */
|
||||
MailingList, /**< Mailing list */
|
||||
MessageId, /**< Message Id */
|
||||
|
@ -252,7 +253,6 @@ static constexpr std::array<Field, Field::id_size()>
|
|||
Field::Flag::IncludeInSexp |
|
||||
Field::Flag::IndexableTerm,
|
||||
},
|
||||
|
||||
{
|
||||
Field::Id::Changed,
|
||||
Field::Type::TimeT,
|
||||
|
@ -316,6 +316,17 @@ static constexpr std::array<Field, Field::id_size()>
|
|||
Field::Flag::IncludeInSexp |
|
||||
Field::Flag::IndexableTerm,
|
||||
},
|
||||
{
|
||||
Field::Id::Language,
|
||||
Field::Type::String,
|
||||
"language", "lang",
|
||||
"ISO 639-1 language code for body",
|
||||
"lang:nl",
|
||||
'a',
|
||||
Field::Flag::BooleanTerm |
|
||||
Field::Flag::Value |
|
||||
Field::Flag::IncludeInSexp
|
||||
},
|
||||
{
|
||||
Field::Id::Maildir,
|
||||
Field::Type::String,
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include <utils/mu-utils.hh>
|
||||
#include <utils/mu-error.hh>
|
||||
#include <utils/mu-option.hh>
|
||||
#include <utils/mu-lang-detector.hh>
|
||||
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
|
@ -67,6 +68,8 @@ struct Message::Private {
|
|||
Option<std::string> body_txt;
|
||||
Option<std::string> body_html;
|
||||
Option<std::string> embedded;
|
||||
|
||||
Option<std::string> language; /* body ISO language code */
|
||||
};
|
||||
|
||||
|
||||
|
@ -531,6 +534,14 @@ process_message(const MimeMessage& mime_msg, const std::string& path,
|
|||
info.mailing_list = get_mailing_list(mime_msg);
|
||||
if (info.mailing_list)
|
||||
info.flags |= Flags::MailingList;
|
||||
|
||||
if (info.body_txt) { /* attempt to get the body-language */
|
||||
if (const auto lang{detect_language(info.body_txt.value())}; lang) {
|
||||
info.language = lang->code;
|
||||
g_debug("detected language: %s", lang->code);
|
||||
} else
|
||||
g_debug("could not detect language");
|
||||
}
|
||||
}
|
||||
|
||||
static Mu::Result<std::string>
|
||||
|
@ -586,8 +597,6 @@ fake_message_id(const std::string& path)
|
|||
* based on a field. So we add them here.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
static void
|
||||
doc_add_list_post(Document& doc, const MimeMessage& mime_msg)
|
||||
{
|
||||
|
@ -643,7 +652,7 @@ fill_document(Message::Private& priv)
|
|||
doc_add_reply_to(doc, mime_msg); /* only in sexp */
|
||||
|
||||
field_for_each([&](auto&& field) {
|
||||
/* insist on expliclity handling each */
|
||||
/* insist on explicitly handling each */
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic error "-Wswitch"
|
||||
switch(field.id) {
|
||||
|
@ -652,7 +661,6 @@ fill_document(Message::Private& priv)
|
|||
break;
|
||||
case Field::Id::BodyText:
|
||||
doc.add(field.id, priv.body_txt);
|
||||
|
||||
break;
|
||||
case Field::Id::Cc:
|
||||
doc.add(field.id, mime_msg.contacts(Contact::Type::Cc));
|
||||
|
@ -676,6 +684,9 @@ fill_document(Message::Private& priv)
|
|||
case Field::Id::From:
|
||||
doc.add(field.id, mime_msg.contacts(Contact::Type::From));
|
||||
break;
|
||||
case Field::Id::Language:
|
||||
doc.add(field.id, priv.language);
|
||||
break;
|
||||
case Field::Id::Maildir: /* already */
|
||||
break;
|
||||
case Field::Id::MailingList:
|
||||
|
|
|
@ -110,7 +110,6 @@ public:
|
|||
}
|
||||
/* LCOV_EXCL_STOP */
|
||||
|
||||
|
||||
/**
|
||||
* Construct a message from a string. This is mostly useful for testing.
|
||||
*
|
||||
|
@ -406,8 +405,8 @@ public:
|
|||
const std::vector<Part>& parts() const;
|
||||
|
||||
/**
|
||||
* Get the path to a cche directory for this message, which
|
||||
* is useful for temporarily saving attachments
|
||||
* Get the path to a cache directory for this message, which is useful
|
||||
* for temporarily saving attachments
|
||||
*
|
||||
* @param index optionally, create <cache-path>/<index> instead;
|
||||
* this is useful for having part-specific subdirectories.
|
||||
|
|
Loading…
Reference in New Issue